-
/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"#include <vector>#include <string>#include <algorithm>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size, const std::vector<int> &stride, std::vector<int> *padding_l, std::vector<int> *padding_r, const std::vector<int> &dilation) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 if (src_shape.size() < 2) {//判断src_shape的大小是否小于2 MS_LOG(EXCEPTION) << "set pad only support src dim >= 2!"; } std::vector<int> weight_height; weight_height.emplace_back(src_shape[src_shape.size() - 2]); weight_height.emplace_back(src_shape[src_shape.size() - 1]); MS_LOG(INFO) << "pad mode: " << pad_mode; if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) { for (size_t i = 0; i < weight_height.size(); ++i) { auto wh = weight_height[i]; int out = (wh + stride[i] - 1) / stride[i]; int effective_k = (SizeToInt(kernel_size[i]) - 1) * dilation[i] + 1; int pad_along = std::max(0, (out - 1) * stride[i] + effective_k - wh); int pad = pad_along / 2; padding_l->emplace_back(pad); padding_r->emplace_back(pad_along - pad); } } else if (pad_mode == PAD_MODE_LOWER_VALID || pad_mode == PAD_MODE_UPPER_VALID) { MS_LOG(INFO) << "pad valid"; padding_l->emplace_back(0); padding_l->emplace_back(0); padding_r->emplace_back(0); padding_r->emplace_back(0); } else { std::vector<int> pad; std::vector<int64_t> pad_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, PAD_LIST); (void)std::transform(pad_me.begin(), pad_me.end(), std::back_inserter(pad), [](const int64_t &value) { return static_cast<int>(value); }); padding_l->emplace_back(pad[0]); padding_l->emplace_back(pad[2]); padding_r->emplace_back(pad[1]); padding_r->emplace_back(pad[3]); }}bool MKLCPUKernel::BinaryBroadCast(std::vector<size_t> *src0_shape, std::vector<size_t> *src1_shape, std::vector<size_t> *dst_shape) { MS_EXCEPTION_IF_NULL(src0_shape);//空指针异常 MS_EXCEPTION_IF_NULL(src1_shape); MS_EXCEPTION_IF_NULL(dst_shape); bool need_swap = false; if (dst_shape->size() == 0) {//如果dst大小等于0,执行下面循环体 dst_shape->emplace_back(1); src0_shape->emplace_back(1); src1_shape->emplace_back(1); } MS_LOG(DEBUG) << "Binary broadcast in: src0: " << *src0_shape << " src1: " << *src1_shape << " dst: " << *dst_shape; if (src0_shape->size() != dst_shape->size()) {//如果src0_shaspe的大小和dst-shape的大小不相等,执行限免循环体 need_swap = true; for (size_t i = src0_shape->size(); i < dst_shape->size(); ++i) { src0_shape->insert(src0_shape->begin(), 1);//src0_shape与dst_shape大小差几位,就在数的最前面加几个1 } } else if (src1_shape->size() != dst_shape->size()) { for (size_t i = src1_shape->size(); i < dst_shape->size(); ++i) {//src1_shape与dst_shape大小差几位,就在数的最前面加几个1 src1_shape->insert(src1_shape->begin(), 1); } } if (src0_shape->size() == src1_shape->size()) {//如果src0_shape的大小和src1_shape的大小相等,执行下面循环体 bool visit_src0 = false;//访问src0失败 bool visit_src1 = false;//访问src1失败 for (size_t i = 0; i < src0_shape->size(); ++i) { if (src0_shape->at(i) != src1_shape->at(i)) { //src0_shape和src1_shape中i对应指向的数哪个是1,哪个就访问成功,保留1 if (src0_shape->at(i) == 1 && !visit_src1) {//如果src0_shape中i指向值为1,访问成功,需要互换 need_swap = true; visit_src0 = true; } else if (src1_shape->at(i) == 1 && !visit_src0) {//如果src0_shape中i指向值不是,1src1_shape中i指向值为1,访问成功,不需要需要互换 need_swap = false; visit_src1 = true; } else {//其他情况属于无效 MS_LOG(EXCEPTION) << "Invalid broadcast! " << *src0_shape << " vs " << *src1_shape; } } } } else { MS_LOG(EXCEPTION) << "Invalid broadcast! src0: " << *src0_shape << " src1: " << *src1_shape << " dst: " << *dst_shape; } MS_LOG(DEBUG) << "Binary broadcast out: src0: " << *src0_shape << " src1: " << *src1_shape << " dst: " << *dst_shape; return need_swap;}dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const { dnnl::memory::format_tag mem_tag; auto dim_size = dims.size();//调用函数获取dims的大小 //判断dims的大小到底为下面哪种情况 if (dim_size == 5) { mem_tag = dnnl::memory::format_tag::abcde; } else if (dim_size == 4) { mem_tag = dnnl::memory::format_tag::abcd; } else if (dim_size == 3) { mem_tag = dnnl::memory::format_tag::abc; } else if (dim_size == 2) { mem_tag = dnnl::memory::format_tag::ab; } else if (dim_size == 1) { mem_tag = dnnl::memory::format_tag::a; } else { MS_LOG(EXCEPTION) << "kernel dims invalid " << dim_size;//错误日志,内核无效 } return mem_tag;//返回值}dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) { dnnl::memory::dims dims; if (shape.size() == 0) { dims.insert(dims.end(), 1); } else { dims.insert(dims.end(), shape.begin(), shape.end()); } dnnl::memory::format_tag mem_tag = GetDefaultFormatTag(dims); dnnl::memory::desc mem_desc(dims, dnnl::memory::data_type::f32, mem_tag); return mem_desc;}//函数调用void MKLCPUKernel::AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc) { arguments_[arg_key] = MKLKernelEngine::Get().CreateMemory(mem_desc, alloc);}void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) { auto arg_iter = arguments_.find(arg_key); if (arg_iter != arguments_.end()) { arg_iter->second.set_data_handle(ptr); }}void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) { MKLKernelEngine::Get().Reorder(src_mem, dst_mem);}} // namespace kernel} // namespace mindspore
-
/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"#include <string>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套const int kMaxLSTMLayer = 100;//申明变量并chu'sconst int kOutputWorkSpaceIndex = 3;void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); output_size_list_[kOutputWorkSpaceIndex] = reserve_size_;}void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {#ifdef PLATFORM_86 _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);#endif MS_EXCEPTION_IF_NULL(kernel_node); using tag = dnnl::memory::format_tag; using dim = dnnl::memory::dims; CheckParam(kernel_node);//调用函数检查节点参数 auto eng = MKLKernelEngine::Get().engine(); dnnl::stream s(eng); dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional; if (bidirectional_) { direction = dnnl::rnn_direction::bidirectional_concat; } //函数调用获取参数 dim src_dims = {seq_len_, batch_size_, input_size_}; dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_}; weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_}; bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_}; dim dst_dims = {seq_len_, batch_size_, static_cast<int64_t>(hidden_size_) * num_directions_}; dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_}; dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc); dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc); dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc); dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo); dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc); dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc); dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc); //判断参数是否经过训练 if (!kernel_node->HasAttr(kAttrIsTraining)) { MS_LOG(WARNING) << "LSTM has no attr is_training"; } is_training = GetValue<bool>(kernel_node->GetAttr(kAttrIsTraining)); auto prop_kind = dnnl::prop_kind::forward_training; if (!is_training) { prop_kind = dnnl::prop_kind::forward_inference; } auto desc = std::make_shared<dnnl::lstm_forward::desc>( prop_kind, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any), formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc); prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng); primitive_ = std::make_shared<dnnl::lstm_forward>(prim_desc_); if (is_training) { reserve_size_ = static_cast<size_t>(prim_desc_.workspace_desc().get_size()); AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc()); } else { reserve_size_ = 1; } AddArgument(DNNL_ARG_SRC_LAYER, src_desc); AddArgument(DNNL_ARG_SRC_ITER, src_h_desc); AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc); AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc()); AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc()); AddArgument(DNNL_ARG_BIAS, bias_desc); AddArgument(DNNL_ARG_DST_LAYER, dst_desc); AddArgument(DNNL_ARG_DST_ITER, dst_h_desc); AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc);}//检查参数void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) { std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional"); input_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size")); hidden_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size")); num_layers_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers")); has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");//存在的偏差 batch_size_ = SizeToInt(src_shape[1]); seq_len_ = SizeToInt(src_shape[0]); num_directions_ = 1; if (bidirectional_) { num_directions_ = 2; } const int gate_size = 4 * hidden_size_; //确定层数在0到100之间 if (num_layers_ <= 0) { MS_LOG(EXCEPTION) << "layers must be greater than zero!"; } if (num_layers_ > kMaxLSTMLayer) { MS_LOG(EXCEPTION) << "layers must be lower than 100!"; } for (int i = 0; i < num_layers_; ++i) { weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); weight_h_size_ += gate_size * hidden_size_; } weight_size_ = weight_size_ * num_directions_; weight_h_size_ = weight_h_size_ * num_directions_; if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { MS_LOG(EXCEPTION) << "error iteration shape!"; } if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { MS_LOG(EXCEPTION) << "lstm only support 3-D input!"; }}bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> & /*workspace*/, const std::vector<kernel::AddressPtr> &outputs) { using dt = dnnl::memory::data_type;//起别名数据属性 using tag = dnnl::memory::format_tag; //函数调用,并起别名代替原来复杂函数名 auto eng = MKLKernelEngine::Get().engine(); auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng); auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng); auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng); auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng); user_weights_memory.set_data_handle(inputs[3]->addr); user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_); Reorder(&user_weights_memory, &weights_memory); Reorder(&user_weights_h_memory, &weights_h_memory); auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng); if (has_bias_) { bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_); } else { if (memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0, prim_desc_.bias_desc().get_size())) { MS_LOG(EXCEPTION) << "bias memset error";//偏差函数错误 } } // set handle类型函数 SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr); SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr); SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr); SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle()); SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle()); SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle()); SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr); SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr); SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr); if (is_training) { SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr); } ExecutePrimitive(); return true;}} // namespace kernel} // namespace mindspore
-
/** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的文件#include "backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套template <typename T>void L2NormalizeGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); CheckIONumber(kernel_node); for (size_t i = 0; i < INPUT_SIZE; i++) {//遍历数组 input_shape_list_.emplace_back(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i)); } auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); CheckInputShape(output_shape);//调用函数检查输入空间大小 int output_dim_length = output_shape.size(); dim_elem_num_list_.resize(output_dim_length, 1); for (int i = output_dim_length - 2; i >= 0; i--) { dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1]; } int axis = LongToInt(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "axis")); int input_dim_length = SizeToInt(input_shape_list_[0].size()); axis_ = axis < 0 ? (axis + input_dim_length) : axis; epsilon_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon"));}template <typename T>bool L2NormalizeGradCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs) { //起别名 auto input_x = reinterpret_cast<T *>(inputs[0]->addr); auto y = reinterpret_cast<T *>(inputs[1]->addr); auto dout = reinterpret_cast<T *>(inputs[2]->addr); auto output = reinterpret_cast<T *>(outputs[0]->addr); auto output_size = outputs[0]->size / sizeof(T); auto task = [&](size_t start, size_t end) { for (size_t i = start; i < end; i++) {//遍历获取输入输出信息 std::vector<size_t> high_dim_index; OneDimIndexToHighDimIndex(i, &high_dim_index); std::vector<T> input_x_vector; GetVector(&input_x_vector, high_dim_index, input_x); std::vector<T> dout_vector; GetVector(&dout_vector, high_dim_index, dout); std::vector<T> y_vector; GetVector(&y_vector, high_dim_index, y); GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]); } }; CPUKernelUtils::ParallelFor(task, output_size); return true;}template <typename T>void L2NormalizeGradCPUKernel<T>::CheckInputShape(const std::vector<size_t> &output_shape) { for (const auto &shape : input_shape_list_) {//增强for循环 ,遍历数组 //判断输入和输出类型是否相同,若不同记录错误日志 if (output_shape != shape) { MS_LOG(EXCEPTION) << "Input shape and output shape should be same."; } } auto input_x_shape = input_shape_list_[0]; //判断输入类型是否为空 if (input_x_shape.size() != 0) { if (std::any_of(input_x_shape.begin(), input_x_shape.end(), [](size_t i) { return i == 0; })) { MS_LOG(EXCEPTION) << "L2NormalizeCPUKernel input is null."; } }}template <typename T>void L2NormalizeGradCPUKernel<T>::CheckIONumber(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入数值 if (input_num != INPUT_SIZE) {//判断输入值大小是否符合要求 MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but L2NormalizeGradCPUKernel needs 3 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != OUTPUT_SIZE) {//判断输出值大小是否符合要求 MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but L2NormalizeGradCPUKernel needs 1 output."; }}template <typename T>void L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index) { for (const auto &item : dim_elem_num_list_) { high_dim_index->push_back(one_dim_index / item); one_dim_index %= item; }}template <typename T>void L2NormalizeGradCPUKernel<T>::HighDimIndexToOneDimIndex(size_t *one_dim_index, const std::vector<size_t> &high_dim_index) { *one_dim_index = 0; int len = high_dim_index.size(); for (int i = 0; i < len; i++) { *one_dim_index += high_dim_index[i] * dim_elem_num_list_[i]; }}template <typename T>void L2NormalizeGradCPUKernel<T>::GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index, const T *x) { auto x_shape = input_shape_list_[0]; for (size_t i = 0; i < x_shape[axis_]; i++) {//遍历数组 size_t oneDimIndex = 0; std::vector<size_t> tmp_high_dim_index = high_dim_index; tmp_high_dim_index[axis_] = i; HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index); x_vector->push_back(x[oneDimIndex]); }}template <typename T>void L2NormalizeGradCPUKernel<T>::GetSumOfProduct(const std::vector<T> &x_vector, const std::vector<T> &y_vector, T *ss) { size_t len = x_vector.size(); std::vector<T> tmp_vector(len); for (size_t i = 0; i < len; i++) {//b tmp_vector[i] = x_vector[i] * y_vector[i]; } if (len % 2 == 1) { tmp_vector[0] += tmp_vector[len - 1]; } for (size_t stride = len / 2; stride > 0; stride >>= 1) { for (size_t i = 0; i < stride; i++) { tmp_vector[i] += tmp_vector[i + stride]; } if (stride > 2 && stride % 2 == 1) { tmp_vector[0] += tmp_vector[stride - 1]; } } *ss = tmp_vector[0];}template <typename T>void L2NormalizeGradCPUKernel<T>::GetOutput(const std::vector<T> &input_x_vector, const std::vector<T> &y_vector, const std::vector<T> &dout_vector, const std::vector<size_t> &high_dim_index, T *output) { size_t axis_index = high_dim_index[axis_]; T dout = dout_vector[axis_index]; T y = y_vector[axis_index]; T tmp_sum1; GetSumOfProduct(y_vector, dout_vector, &tmp_sum1); T tmp_sum2; GetSumOfProduct(input_x_vector, input_x_vector, &tmp_sum2); tmp_sum2 = sqrt(tmp_sum2); if (tmp_sum2 >= epsilon_) { *output = (dout - y * tmp_sum1) / tmp_sum2; } else { *output = (dout - y * tmp_sum1) / epsilon_; }}} // namespace kernel} // namespace mindspore
-
/** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入系统自带的或者自定义的包#include "backend/kernel_compiler/kernel_query.h"#include <memory>#include <algorithm>#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"#include "backend/kernel_compiler/host/host_kernel_metadata.h"#include "backend/kernel_compiler/rts/rt_kernel_info.h"#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"#include "backend/session/anf_runtime_algorithm.h"#include "utils/ms_context.h"#include "utils/trace_base.h"//声明一个空间namespace mindspore {namespace kernel {//嵌套一个空间namespace {void FilterInvalidKernelInfo(const CNodePtr &kernel_node,//过滤无效的算子内核信息 std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_info_list); MS_EXCEPTION_IF_NULL(kernel_node); size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(kernel_node);//获取输出变量的大小 size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入变量的大小 std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_list; (void)std::copy_if( kernel_info_list->begin(), kernel_info_list->end(), std::back_inserter(filtered_list), [output_tensor_num, input_tensor_num](const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build_info) { return kernel_build_info->GetOutputNum() == output_tensor_num &&//获取返回输入输出值 kernel_build_info->GetInputNum() == input_tensor_num; }); if (!filtered_list.empty()) {//清理过滤信息 kernel_info_list->clear(); (void)std::copy(filtered_list.begin(), filtered_list.end(), std::back_inserter(*kernel_info_list)); } else {//所有内核信息列表不匹配任何内核信息 MS_LOG(INFO) << "All kernel Info list does not match any kernel info "; for (size_t index = 0; index < kernel_info_list->size(); ++index) {//遍历内核信息 std::ostringstream buffer; auto &kernel_info = kernel_info_list->at(index);//声明变量起别名存储内核信息 MS_EXCEPTION_IF_NULL(kernel_info); if (kernel_info->GetOutputNum() != output_tensor_num) {//如果获得的输出内核信息数与输出信息数不相等,无法匹配输出信息大小 buffer << "Kernel node's output size [" << output_tensor_num << "]" << " cannot match the kernel's output size [" << kernel_info->GetOutputNum() << "]"; } else {//无法匹配输入大小 buffer << "Kernel node's output size [" << input_tensor_num << "]" << " cannot match the kernel's output size [" << kernel_info->GetInputNum() << "]"; } MS_LOG(INFO) << "kernel [ " << index << " ] :" << kernel_info->ToString() << buffer.str(); } kernel_info_list->clear();//调用结构体函数清理信息 MS_LOG(INFO) << "node" << kernel_node->DebugString() << "'s output size : [" << output_tensor_num << "]" << "input size : [" << input_tensor_num << "] cannot match any kernelInfo !"; }}} // namespace//KernelQuery的所有信息void KernelQueryAll(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list);//信息列表 std::string op_name = AnfAlgo::GetCNodeName(kernel_node); TbeMetadataInfo(kernel_node, kernel_info_list); if (kernel_info_list->empty()) {//如果没有找到信息,尝试去获取信息 AicpuMetadataInfo(kernel_node, kernel_info_list); if (!kernel_info_list->empty()) { MS_LOG(INFO) << "The node [" << kernel_node->DebugString() << "] cannot find valid TBE kernel info, try to get aicpu kernel info"; AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node); } } if (kernel_info_list->empty()) { GetRtKelInfo(kernel_node, kernel_info_list); } if (kernel_info_list->empty()) { HcclMetadataInfo(kernel_node, kernel_info_list); } if (kernel_info_list->empty()) { HostMetadataInfo(kernel_node, kernel_info_list); } if (kernel_info_list->empty()) { MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op [" << op_name << ", " << kernel_node->fullname_with_scope() << "]. Node DebugString:" << kernel_node->DebugString() << ", maybe the operator can not supported on current platform. \n trace " << trace::DumpSourceLines(kernel_node); }} //信息查询void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list, KernelType kernel_type) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); auto context_ptr = MsContext::GetInstance();//声明变量并赋值 MS_EXCEPTION_IF_NULL(context_ptr); const PrimitivePtr kPrimProdForceSeA = std::make_shared<Primitive>("ProdForceSeA"); if (IsPrimitiveCNode(kernel_node, kPrimProdForceSeA)) { kernel_type = KernelType::AKG_KERNEL; } switch (kernel_type) {//确定内核的形式,从而执行相应的操作 case KernelType::AKG_KERNEL: AkgMetadataInfo(kernel_node, kernel_info_list); break; default: KernelQueryAll(kernel_node, kernel_info_list); break; } if (kernel_info_list->empty()) {//r如果核心信息为空,执行下面结构体 MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op [" << AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope() << "]. Node DebugString:" << kernel_node->DebugString() << ", maybe the operator can not supported on current platform. \n trace " << trace::DumpSourceLines(kernel_node); } //核对输出值 FilterInvalidKernelInfo(kernel_node, kernel_info_list);} //AI的cpu信息查询void AICPUQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); kernel_info_list->clear(); AicpuMetadataInfo(kernel_node, kernel_info_list); FilterInvalidKernelInfo(kernel_node, kernel_info_list);}//判断是否支持AICPUbool IsSupportedByAICPU(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(select_kernel_build_info); std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; auto cnode = kernel_node->cast<CNodePtr>(); MS_EXCEPTION_IF_NULL(cnode); AICPUQuery(cnode, &kernel_info_list); return std::any_of(kernel_info_list.begin(), kernel_info_list.end(),//返回值 [&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) { MS_EXCEPTION_IF_NULL(item); return item->IsSimilarityKernelBuildInfo(*select_kernel_build_info); });}//判断是否已经支持AICPUbool IsSupportedByAICore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) { MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(select_kernel_build_info); std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list; auto cnode = kernel_node->cast<CNodePtr>();//申明变量并赋值 MS_EXCEPTION_IF_NULL(cnode); TbeMetadataInfo(cnode, &kernel_info_list); return std::any_of(kernel_info_list.begin(), kernel_info_list.end(), [&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) { MS_EXCEPTION_IF_NULL(item); return *item == *select_kernel_build_info; });}} // namespace kernel} // namespace mindspore
-
const std::vector<size_t> &HcclKernel::GetInputSizeList() const { size_t size = 0; if (!input_size_list_.empty()) { return input_size_list_; } for (ulong i = 0; i < hccl_data_type_list_.size(); ++i) {//循环查看大小 if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[i], hccl_kernel_input_shape_list_[i], &size)) { MS_LOG(ERROR) << "GetHcclOpInputSize failed"; } input_size_list_.push_back(size); } return input_size_list_;}const std::vector<size_t> &HcclKernel::GetOutputSizeList() const { auto anf_node = anf_node_.lock();//声明变量并赋值 if (!anf_node) {//如果 MS_LOG(EXCEPTION) << "anf_node pointer is expired."; } size_t size = 0; //如果输出大小不为空,返回输出值 if (!output_size_list_.empty()) { return output_size_list_; } auto cnode = anf_node->cast<CNodePtr>();//声明变量并赋值 auto op_name = AnfAlgo::GetCNodeName(cnode); int64_t rank_size = 1; if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) { rank_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrRankSize); } int64_t fusion = 0; if (AnfAlgo::HasNodeAttr(kAttrFusion, cnode)) { fusion = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion); } ulong loop_size = hccl_data_type_list_.size(); if (AnfAlgo::GetInputTensorNum(anf_node) > 1 && op_name == kAllGatherOpName && fusion >= 1) { loop_size *= rank_size; } if (op_name == kReduceScatterOpName && fusion >= 1) { loop_size = AnfAlgo::GetOutputTensorNum(anf_node); } for (ulong i = 0; i < loop_size; ++i) {//遍历输出 if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[0], hccl_kernel_output_shape_list_[i], &size)) { MS_LOG(ERROR) << "GetHcclOpOutputSize failed"; } output_size_list_.push_back(size); } return output_size_list_;}const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const { if (!workspace_size_list_.empty() || hccl_data_type_list_.empty()) { return workspace_size_list_; } workspace_size_list_.emplace_back(hccl::CalcWorkspaceSize(anf_node_.lock(), hccl_data_type_list_[0])); return workspace_size_list_;}std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs, uint32_t stream_id) { auto anf_node = anf_node_.lock(); if (!anf_node) { MS_LOG(EXCEPTION) << "anf_node pointer is expired.";//anf_node指针失效 } std::string hccl_type = AnfAlgo::GetCNodeName(anf_node); if (hccl_type == kReceive) { if (outputs.empty()) { MS_LOG(EXCEPTION) << "Outputs is empty";//输出值为空 } } else if (inputs.empty() || outputs.empty()) { MS_LOG(EXCEPTION) << "Inputs or outputs is empty";//输入或者输出值为空 } stream_id_ = stream_id; void *input_data_addr = nullptr; if (hccl_type != kReceive) { MS_EXCEPTION_IF_NULL(inputs.at(0)); input_data_addr = inputs.at(0)->addr; } MS_EXCEPTION_IF_NULL(outputs.at(0)); auto output_data_addr = outputs.at(0)->addr;//声明变量,用指针抵用输出at(0)位置数的地址给前面变量 std::vector<uint8_t> private_def; HcclDataType data_type = hccl_data_type_list_[0]; std::vector<hccl::HcclTaskInfo> task_info; bool ret = hccl::GenTask(anf_node, data_type, &task_info); if (!ret) { MS_LOG(EXCEPTION) << "Gen Task for " << anf_node->DebugString() << " failed."; } std::vector<TaskInfoPtr> results; for (auto &task : task_info) {//遍历信息到任务日志 MS_LOG(INFO) << "HCCL Task : stream_id=" << stream_id << ", count=" << hccl_count_ << ", root_id=" << root_id_ << ", op_type=" << static_cast<int>(op_type_) << ", data_type=" << static_cast<int>(data_type) << ", workspace_size=" << task.workspace_size << ", stream_num=" << task.stream_num << ", private_def_size=" << task.private_def.size(); private_def.resize(task.private_def.size()); auto sec_ret = memcpy_s(private_def.data(), private_def.size(), task.private_def.data(), task.private_def.size()); if (sec_ret != 0) { MS_LOG(EXCEPTION) << "Set data memcpy_s failed, ret = " << sec_ret; } void *workspace_addr = nullptr;//地址指向的值为空 if (task.workspace_size != 0) { if (workspace.empty()) { MS_LOG(EXCEPTION) << "Workspace size list of " << anf_node->DebugString() << " is empty"; } MS_EXCEPTION_IF_NULL(workspace.at(0)); workspace_addr = workspace.at(0)->addr; } results.emplace_back(std::make_shared<HcclTaskInfo>( kernel_name_, stream_id, hccl::GetHcclType(anf_node), input_data_addr, output_data_addr, workspace_addr, task.workspace_size, task.stream_num, private_def, hccl::GetHcclOpsKernelInfoStore(), hccl_count_, root_id_, op_type_, data_type, group_, NeedDump())); } return results;//返回值}device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { AddressPtrList inputs; AddressPtrList workspaces; AddressPtrList outputs; device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &inputs, &workspaces, &outputs); std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_.lock())); if (inputs.empty()) { MS_LOG(EXCEPTION) << "Hccl kernel input is empty";//输入值为空 } if (hccl_data_type_list_.empty()) { MS_LOG(EXCEPTION) << "Hccl data type list is empty";//数据类型列表为空 } MS_EXCEPTION_IF_NULL(inputs.at(0)); auto input_data_addr = inputs.at(0)->addr;//声明变量,用指针抵用输入at(0)位置数的地址给前面变量 MS_EXCEPTION_IF_NULL(outputs.at(0)); auto output_data_addr = outputs.at(0)->addr;//声明变量,返回输出at(0)位置上的数据的地址给前面变量 HcclDataType data_type = hccl_data_type_list_[0]; auto executor = std::make_shared<device::ascend::HcclDynamicKernel>( hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr); return executor;}} // namespace kernel} // namespace mindspore
-
//导入自定义的头文件#include "backend/kernel_compiler/hccl/hccl_kernel.h"//导入系统自带的头文件#include <map>///导入自定义的头文件#include "backend/session/anf_runtime_algorithm.h"#include "utils/utils.h"#include "utils/ms_context.h"#include "runtime/device/kernel_runtime.h"#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"#include "runtime/hccl_adapter/hccl_adapter.h"//using声明将成员变量引入到当前作用域中,使得在当前作用域下访问另一个作用域下的成员时无需使用限定符 ::using HcclTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::HcclTaskInfo>;using mindspore::ge::model_runner::HcclTaskInfo;namespace {//声明一个空间static std::map<std::string, std::string> kMsOpNameToHcomHcclType = { {mindspore::kAllReduceOpName, mindspore::kHcomOpTypeAllReduce}, {mindspore::kAllGatherOpName, mindspore::kHcomOpTypeAllGather}, {mindspore::kBroadcastOpName, mindspore::kHcomOpTypeBroadcast}, {mindspore::kHcomSendOpName, mindspore::kHcomOpTypeSend}, {mindspore::kReceiveOpName, mindspore::kHcomOpTypeReceive}, {mindspore::kReduceScatterOpName, mindspore::kHcomOpTypeReduceScatter}};std::string MsOpNameToHcomOpType(const std::string &ms_op_type) { auto iter = kMsOpNameToHcomHcclType.find(ms_op_type);//声明变量调用函数并赋值 if (iter == kMsOpNameToHcomHcclType.end()) {//如果类型不一致,记录错误日志:无效类型 MS_LOG(EXCEPTION) << "Invalid MsOpType:" << ms_op_type; } return iter->second;}} // namespacenamespace mindspore {//声明空间namespace kernel {//空间嵌套void HcclKernelFactory::Register(const std::string &name, HcclKernelCreater &&fun) { hcclKernelMap_.emplace(name, std::move(fun));}std::shared_ptr<HcclKernel> HcclKernelFactory::Get(const std::string &name) { const auto &map = Get().hcclKernelMap_; auto it = map.find(name);//声明变量,并调用函数赋值 if (it != map.end() && it->second) {//判断类型是否相同 return (it->second)(); } return nullptr;//返回值为空}HcclKernelFactory &HcclKernelFactory::Get() { static HcclKernelFactory _this; return _this;}HcclKernel::HcclKernel() : hccl_count_(0), op_type_(HCCL_REDUCE_SUM), root_id_(0), receive_type_(0) {}HcclKernel::~HcclKernel() { hccl_kernel_input_shape_list_.clear(); hccl_kernel_output_shape_list_.clear(); hccl_data_type_list_.clear(); hccl_count_ = 0; op_type_ = HCCL_REDUCE_SUM; root_id_ = 0; input_size_list_.clear(); output_size_list_.clear(); workspace_size_list_.clear();}bool HcclKernel::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); op_name_ = AnfAlgo::GetCNodeName(anf_node);//声明变量,并调用函数获取节点名字赋值个变量 if (op_name_ == kReceive) {//判断名字是否相同,相同时判断具体错误类型 if (!HcomUtil::GetHcomReceiveType(anf_node, &receive_type_)) { MS_LOG(ERROR) << "GetHcomReceiveType fail!";//接收类型丢失 return false; } } if (!HcomUtil::GetKernelInputShape(anf_node, &hccl_kernel_input_shape_list_)) { MS_LOG(ERROR) << "GetKernelInputShape fail!";//输入类型丢失 return false; } if (!HcomUtil::GetKernelOutputShape(anf_node, &hccl_kernel_output_shape_list_)) { MS_LOG(ERROR) << "GetKernelOutputShape fail!";//输出类型丢失 return false; } if (op_name_ == kReceive) {//名称相同的情况下 auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(receive_type_); if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) { MS_LOG(ERROR) << "HcomDataType cannot support Current Ascend Data Type : " << receive_type_; return false; } hccl_data_type_list_.emplace_back(iter->second); } else if (!HcomUtil::GetHcomDataType(anf_node, &hccl_data_type_list_)) { MS_LOG(ERROR) << "GetHcomDataType fail!";//获取hcom数据类型丢失 return false; } if (op_name_ == kReceive) { if (!HcomUtil::GetHcomCount(anf_node, hccl_data_type_list_, hccl_kernel_output_shape_list_, &hccl_count_)) { MS_LOG(ERROR) << "GetHcomCount fail!";//错误日志 return false; } } else { if (!HcomUtil::GetHcomCount(anf_node, hccl_data_type_list_, hccl_kernel_input_shape_list_, &hccl_count_)) { MS_LOG(ERROR) << "GetHcomCount fail!";//错误日志 return false; } } if (op_name_ == kAllReduce || op_name_ == kReduceScatter) { if (!HcomUtil::GetHcomOperationType(anf_node, &op_type_)) { MS_LOG(ERROR) << "GetHcomOperationType fail!";//错误日志 return false; } } if (op_name_ == kBroadcast) { if (!HcomUtil::GetHcomRootId(anf_node, &root_id_)) { MS_LOG(ERROR) << "GetHcomRootId fail!";//错误日志 return false; } } HcomUtil::GetHcomGroup(NOT_NULL(anf_node), NOT_NULL(&group_)); anf_node_ = anf_node; return true;}
-
/** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"#include <string>#include <algorithm>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核信息 MS_EXCEPTION_IF_NULL(kernel_node);//空指针异常 std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); //如果src或者weight的大小不等于4个节点,执行下面结构体 if (src_shape.size() != 4 || weight_shape.size() != 4) { MS_LOG(EXCEPTION) << "conv2d only support nchw input!";//错误日志:conw2d只支持nchw输入 } std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));//获取组别大小 //如果组别数不等于1 if (group != 1) { if (src_shape[1] % group != 0) {//并且src的大小不能正好平均分配到不同组别,执行下面语句 MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";//Conv2d通道应按组划分 } weight_shape.insert(weight_shape.begin(), group); weight_shape[1] = weight_shape[1] / group; } dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); std::vector<int> stride_ori; std::vector<int> dilation_ori; auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE); auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION); (void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori), [](const int64_t &value) { return static_cast<int>(value); }); (void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori), [](const int64_t &value) { return static_cast<int>(value); }); if (stride_ori[0] != 1 || stride_ori[1] != 1) {//conv2d stride在N轴和C轴只支持1 MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; } if (dilation_ori.size() != 4) { MS_LOG(EXCEPTION) << "conv2d dilation must be 4d!";// } if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; } std::vector<int> stride{stride_ori[2], stride_ori[3]}; std::vector<int> dilation{dilation_ori[2], dilation_ori[3]}; dnnl::memory::dims strides{stride_ori[2], stride_ori[3]}; dnnl::memory::dims dilates{dilation_ori[2] - 1, dilation_ori[3] - 1}; std::vector<int> int_padding_l; std::vector<int> int_padding_r; const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r, dilation); if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { MS_LOG(EXCEPTION) << "get padding failed"; } dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]}; dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]}; dnnl::convolution_forward::desc desc = dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc, weights_desc, dst_desc, strides, dilates, padding_l, padding_r); auto prim_desc = dnnl::convolution_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); primitive_ = std::make_shared<dnnl::convolution_forward>(prim_desc); AddArgument(DNNL_ARG_SRC, src_desc); AddArgument(DNNL_ARG_WEIGHTS, weights_desc); AddArgument(DNNL_ARG_DST, dst_desc);}//sbool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> & /*workspace*/, const std::vector<kernel::AddressPtr> &outputs) { if (inputs.size() < 2 || outputs.empty()) { MS_LOG(EXCEPTION) << "error input output size!"; } SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr); SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr); SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); return true;}} // namespace kernel} // namespace mindspore
-
/** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */#include "backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.h"//导入自定义文件#include <string>#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"#include "utils/ms_utils.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); MS_EXCEPTION_IF_NULL(kernel_node);//空指针异常类型 size_t type_size = sizeof(float);//判断float占几个字节大小,并将结果存入type_size变量中 std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//节点类型形状 size_t tensor_size = shape[1] * 2 * type_size; input_size_list_.pop_back(); // [2, c] 到存储规模偏差 workspace_size_list_.emplace_back(tensor_size); // [2, c] 到存储不同的规模和不同的偏差 workspace_size_list_.emplace_back(tensor_size);}void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {//批量处理内核信息 MS_EXCEPTION_IF_NULL(kernel_node); std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); if (x_shape.size() != 4) {//如果x的字节大小不是4个字节,记录错误日志 MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!";//"融合标准批只支持nchw输入! } batch_size = x_shape[0];//记录第一个数据形状 channel = x_shape[1]; hw_size = x_shape[2] * x_shape[3];//计算硬件大小 nhw_size = x_shape[0] * hw_size;//计算新的硬件大小 dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape); dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel}); auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon"); auto prop_kind = dnnl::prop_kind::forward_training; auto normalization_flags = dnnl::normalization_flags::use_scale_shift; //融合批处理正向描述 dnnl::batch_normalization_forward::desc desc = dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags); auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine()); //批量归一化反向描述 dnnl::batch_normalization_backward::desc backward_desc = dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags); auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc( backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc); primitive_ = std::make_shared<dnnl::batch_normalization_backward>(backward_prim_desc); AddArgument(DNNL_ARG_SRC, x_desc); AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc()); AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc()); AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc); AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc()); AddArgument(DNNL_ARG_DST, x_desc); AddArgument(DNNL_ARG_DIFF_DST, x_desc); AddArgument(DNNL_ARG_DIFF_SRC, x_desc); AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc);}bool BatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &workspace, const std::vector<kernel::AddressPtr> &outputs) { if (inputs.size() < 5 || outputs.empty()) {//判断输入数据大小是否小于五个字节,输出结果是否为空 MS_LOG(EXCEPTION) << "Error input output size!"; } auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr);//初始化变量并赋值 auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size);//按比例缩小的返回值 auto max_size = workspace[0]->size - inputs[2]->size;//按比例放大的返回值 auto bias_ret = memset_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, 0., max_size);//偏差值 if (scale_ret != 0 && bias_ret != 0) { MS_LOG(EXCEPTION) << "Memcpy_s error."; return false; } SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr); SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr); SetArgumentHandle(DNNL_ARG_MEAN, inputs[3]->addr); SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[4]->addr); SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr); SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr); SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr); ExecutePrimitive(); auto wksp_out = reinterpret_cast<float *>(workspace[1]->addr);//输出工作空间 auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size);//按比例缩小的返回值 auto diff_bias_ret = memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), outputs[2]->size);//按比例放大的返回值 if (diff_scale_ret != 0 || diff_bias_ret != 0) { MS_LOG(EXCEPTION) << "Memcpy_s error."; return false;//fanhu } return true;}} // namespace kernel} // namespace mindspore
-
void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {//设置节点输出 MS_EXCEPTION_IF_NULL(proto); MS_EXCEPTION_IF_NULL(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); if (output_num == 1 && HasAbstractMonad(anf_node)) {//if语句判断输出数字是否为1 output_num = 0; } if (output_num == 0) {//if语句判断输出数字是否为0 MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. "; return; } for (size_t output_index = 0; output_index < output_num; output_index++) {//for循环遍历输出大小 ::mindspore::Tensor *node_outputs = proto->add_outputs(); MS_EXCEPTION_IF_NULL(node_outputs); std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); MS_EXCEPTION_IF_NULL(tensorShape); for (auto item : output_shape) {//增强for循环遍历输出形状 mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); MS_EXCEPTION_IF_NULL(dim); dim->set_size((::google::protobuf::int64)item);//通过结构体指针调用函数 } TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); node_outputs->set_tensor_type(output_data_type); node_outputs->set_mem_device("HBM"); }}void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(proto); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) { op_name = kInitData; } // 设置op名称 proto->set_op(op_name); // 设置输入张量 SetNodeInputs(anf_node, proto); //设置输出张量 SetNodeOutputs(anf_node, proto); // 设置节点attr SetNodeAttr(anf_node, proto);}//创造节点函数类型bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { MS_EXCEPTION_IF_NULL(kernel_mod_ptr); MS_EXCEPTION_IF_NULL(anf_node); mindspore::NodeDef proto; SetNodedefProto(anf_node, &proto); std::string nodeDefStr; if (!proto.SerializeToString(&nodeDefStr)) { MS_LOG(ERROR) << "Serialize nodeDef to string failed.";//序列化函数失败 return false; } kernel_mod_ptr->SetNodeDef(nodeDefStr);//设置节点函数 return true;}uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) { // deal1: unknown shape type 未知函数类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);//声明变量起别名 info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; info->infoLen = sizeof(int32_t); ext_info_offset += kExtInfoHeadSize; auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);//获取形状类型 *shape_type = type; ext_info_offset += info->infoLen; return ext_info_offset;}uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset, const std::shared_ptr<AnfNode> &anf_node, size_t input_num) { // deal2:input ShapeAndType 输入形状和类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; info->infoLen = input_num * sizeof(ShapeAndType); ext_info_offset += kExtInfoHeadSize; auto *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环遍历数组输入节点类型和形状 TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); std::vector<size_t> input_shape; int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast<CNodePtr>(); MS_EXCEPTION_IF_NULL(cnode); auto input_node = cnode->inputs()[input_index + 1]; auto value_ptr = GetValueNode(input_node); auto value = GetValue<std::string>(value_ptr); input_shape.push_back(1); input_shape.push_back(value.size()); input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); } else { input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); } inputs[input_index].type = input_data_type; size_t input_shape_index = 0; for (; input_shape_index < input_shape.size(); input_shape_index++) { inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); } if (input_shape.size() < kMaxShapeDims) { inputs[input_index].dims[input_shape_index] = LLONG_MIN; } } ext_info_offset += info->infoLen; return ext_info_offset;}uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset, const std::shared_ptr<AnfNode> &anf_node, size_t output_num) { // deal3:确定输出形状和类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; info->infoLen = output_num * sizeof(ShapeAndType); ext_info_offset += kExtInfoHeadSize; auto *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); for (size_t output_index = 0; output_index < output_num; output_index++) {// std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); outputs[output_index].type = output_data_type; size_t output_shape_index = 0; for (; output_shape_index < output_shape.size(); output_shape_index++) { outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); } if (output_shape_index < kMaxShapeDims) { outputs[output_index].dims[output_shape_index] = LLONG_MIN; } } ext_info_offset += info->infoLen; return ext_info_offset;}bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {//设置Ext节点函数信息 MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); if (!anf_node->isa<CNode>()) { return true; } if (!AnfAlgo::IsDynamicShape(anf_node)) { return true; } uint64_t ext_info_head_len = kExtInfoHeadSize; std::string ext_info; size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); // 1.addr:未知形状类型 uint64_t ext_info_len = ext_info.size(); ext_info_len += ext_info_head_len + sizeof(int32_t); // 2.addr:输入形状类型 ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType); // 3.addr:输出形状类型 ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType); uint64_t ext_info_offset = ext_info.size(); ext_info.resize(ext_info_len, 0); char *ext_info_buf = ext_info.data(); UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE; auto op_name = AnfAlgo::GetCNodeName(anf_node); if (kComputeDepend.find(op_name) != kComputeDepend.end()) { shape_type = UnknowShapeOpType::DEPEND_COMPUTE; } ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type); ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num); ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num); MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset; //设置ext信息 kernel_mod_ptr->SetExtInfo(ext_info); return true;}KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {//打开创建op的Aicpu MS_EXCEPTION_IF_NULL(anf_node); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) { op_name = kInitData; } auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>(); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); kernel_mod_ptr->SetAnfNode(anf_node); kernel_mod_ptr->SetNodeName(op_name); if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!"; } if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {//如果函数节点类型缺失 MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!"; } if (!SetIOSize(anf_node, kernel_mod_ptr)) {//如果输入输出大小缺失 MS_LOG(EXCEPTION) << "Set input output size list failed."; } return kernel_mod_ptr;//返回值}} // namespace kernel} // namespace mindspore
-
void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {//设置节点输出 MS_EXCEPTION_IF_NULL(proto); MS_EXCEPTION_IF_NULL(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); if (output_num == 1 && HasAbstractMonad(anf_node)) {//if语句判断输出数字是否为1 output_num = 0; } if (output_num == 0) {//if语句判断输出数字是否为0 MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. "; return; } for (size_t output_index = 0; output_index < output_num; output_index++) {//for循环遍历输出大小 ::mindspore::Tensor *node_outputs = proto->add_outputs(); MS_EXCEPTION_IF_NULL(node_outputs); std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape(); MS_EXCEPTION_IF_NULL(tensorShape); for (auto item : output_shape) {//增强for循环遍历输出形状 mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); MS_EXCEPTION_IF_NULL(dim); dim->set_size((::google::protobuf::int64)item);//通过结构体指针调用函数 } TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); node_outputs->set_tensor_type(output_data_type); node_outputs->set_mem_device("HBM"); }}void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(proto); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) { op_name = kInitData; } // 设置op名称 proto->set_op(op_name); // 设置输入张量 SetNodeInputs(anf_node, proto); //设置输出张量 SetNodeOutputs(anf_node, proto); // 设置节点attr SetNodeAttr(anf_node, proto);}//创造节点函数类型bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { MS_EXCEPTION_IF_NULL(kernel_mod_ptr); MS_EXCEPTION_IF_NULL(anf_node); mindspore::NodeDef proto; SetNodedefProto(anf_node, &proto); std::string nodeDefStr; if (!proto.SerializeToString(&nodeDefStr)) { MS_LOG(ERROR) << "Serialize nodeDef to string failed.";//序列化函数失败 return false; } kernel_mod_ptr->SetNodeDef(nodeDefStr);//设置节点函数 return true;}uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) { // deal1: unknown shape type 未知函数类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);//声明变量起别名 info->infoType = FWK_ADPT_EXT_SHAPE_TYPE; info->infoLen = sizeof(int32_t); ext_info_offset += kExtInfoHeadSize; auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);//获取形状类型 *shape_type = type; ext_info_offset += info->infoLen; return ext_info_offset;}uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset, const std::shared_ptr<AnfNode> &anf_node, size_t input_num) { // deal2:input ShapeAndType 输入形状和类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); info->infoType = FWK_ADPT_EXT_INPUT_SHAPE; info->infoLen = input_num * sizeof(ShapeAndType); ext_info_offset += kExtInfoHeadSize; auto *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环遍历数组输入节点类型和形状 TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); std::vector<size_t> input_shape; int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast<CNodePtr>(); MS_EXCEPTION_IF_NULL(cnode); auto input_node = cnode->inputs()[input_index + 1]; auto value_ptr = GetValueNode(input_node); auto value = GetValue<std::string>(value_ptr); input_shape.push_back(1); input_shape.push_back(value.size()); input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); } else { input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); } inputs[input_index].type = input_data_type; size_t input_shape_index = 0; for (; input_shape_index < input_shape.size(); input_shape_index++) { inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]); } if (input_shape.size() < kMaxShapeDims) { inputs[input_index].dims[input_shape_index] = LLONG_MIN; } } ext_info_offset += info->infoLen; return ext_info_offset;}uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset, const std::shared_ptr<AnfNode> &anf_node, size_t output_num) { // deal3:确定输出形状和类型 auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset); info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE; info->infoLen = output_num * sizeof(ShapeAndType); ext_info_offset += kExtInfoHeadSize; auto *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset); for (size_t output_index = 0; output_index < output_num; output_index++) {// std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index); TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index); int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type); outputs[output_index].type = output_data_type; size_t output_shape_index = 0; for (; output_shape_index < output_shape.size(); output_shape_index++) { outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]); } if (output_shape_index < kMaxShapeDims) { outputs[output_index].dims[output_shape_index] = LLONG_MIN; } } ext_info_offset += info->infoLen; return ext_info_offset;}bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {//设置Ext节点函数信息 MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); if (!anf_node->isa<CNode>()) { return true; } if (!AnfAlgo::IsDynamicShape(anf_node)) { return true; } uint64_t ext_info_head_len = kExtInfoHeadSize; std::string ext_info; size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); // 1.addr:未知形状类型 uint64_t ext_info_len = ext_info.size(); ext_info_len += ext_info_head_len + sizeof(int32_t); // 2.addr:输入形状类型 ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType); // 3.addr:输出形状类型 ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType); uint64_t ext_info_offset = ext_info.size(); ext_info.resize(ext_info_len, 0); char *ext_info_buf = ext_info.data(); UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE; auto op_name = AnfAlgo::GetCNodeName(anf_node); if (kComputeDepend.find(op_name) != kComputeDepend.end()) { shape_type = UnknowShapeOpType::DEPEND_COMPUTE; } ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type); ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num); ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num); MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset; //设置ext信息 kernel_mod_ptr->SetExtInfo(ext_info); return true;}KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {//打开创建op的Aicpu MS_EXCEPTION_IF_NULL(anf_node); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) { op_name = kInitData; } auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>(); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); kernel_mod_ptr->SetAnfNode(anf_node); kernel_mod_ptr->SetNodeName(op_name); if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) { MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!"; } if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {//如果函数节点类型缺失 MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!"; } if (!SetIOSize(anf_node, kernel_mod_ptr)) {//如果输入输出大小缺失 MS_LOG(EXCEPTION) << "Set input output size list failed."; } return kernel_mod_ptr;//返回值}} // namespace kernel} // namespace mindspore
-
/** * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//导入包名# include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"#include <google/protobuf/text_format.h>//导入系统自带的包#include <utility>#include <string>#include <vector>#include <memory>#include <algorithm>#include <map>#include <climits>//导入自定的包#include "utils/utils.h"#include "runtime/device/kernel_runtime.h"#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"#include "proto/tensor.pb.h"#include "proto/tensor_shape.pb.h"#include "proto/attr.pb.h"#include "proto/node_def.pb.h"#include "backend/session/anf_runtime_algorithm.h"#include "backend/kernel_compiler/aicpu/aicpu_util.h"#include "backend/session/kernel_graph.h"#include "backend/kernel_compiler/common_utils.h"#include "backend/kernel_compiler/oplib/oplib.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套 //将后面变量名指定别名,并导入变量到当前作用域,方便以后调用。using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;//输出IO流输出大小bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num, std::vector<size_t> *input_size_list) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(input_size_list); for (size_t i = 0; i < input_num; i++) {//遍历数组,输入数字 std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i); if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {//当数据类型相同时 if (!anf_node->isa<CNode>()) {//当anf节点不是关联节点时 MS_LOG(EXCEPTION) << "anf_node is not CNode."; } auto cnode = anf_node->cast<CNodePtr>(); MS_EXCEPTION_IF_NULL(cnode); if (cnode->inputs().size() < (i + 1)) {//如果输入节点大小小于i+1,执行下面语句 MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1; return false;//f } auto input_node = cnode->inputs()[i + 1];//声明变量并赋值 MS_EXCEPTION_IF_NULL(input_node);//函数调用 if (input_node->isa<ValueNode>()) {//如果输入的节点是有效节点,执行下面语句 auto value_ptr = GetValueNode(input_node);//声名变量调用函数并赋值 auto value = GetValue<std::string>(value_ptr);//声名变量调用函数并赋值 input_size_list->push_back(value.size()); } } else {//如果不是有效节点,执行下面循环体 auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; for (size_t j = 0; j < shape_i.size(); j++) {//遍历数组 size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); if (type_byte == 0) {//如果字节类型为0,返回false return false; } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); input_size_list->push_back(LongToSize(size_i)); } } return true;//返回值为true}//设置及布尔类型的函数bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(kernel_mod_ptr); std::vector<size_t> input_size_list; std::vector<size_t> output_size_list; size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node); if (!SetIOIputSize(anf_node, input_num, &input_size_list)) { return false; } kernel_mod_ptr->SetInputSizeList(input_size_list); if (output_num == 1 && HasAbstractMonad(anf_node)) { output_num = 0; } for (size_t i = 0; i < output_num; i++) {//遍历数组输出数字 std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i); TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i)); MS_EXCEPTION_IF_NULL(type_ptr); int64_t size_i = 1; for (size_t j = 0; j < shape_i.size(); j++) {//遍历数组设置i的形状和大小 size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j])); } size_t type_byte = GetTypeByte(type_ptr); if (type_byte == 0) {//如果字节类型个数为0,返回值为false return false } size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte)); output_size_list.push_back(LongToSize(size_i)); } kernel_mod_ptr->SetOutputSizeList(output_size_list); return true;}//函数的属性值void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value, ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) { MS_EXCEPTION_IF_NULL(node_attr); MS_EXCEPTION_IF_NULL(value); if (type == "int") {//如果函数类型为int,执行循环体 auto attr_value = static_cast<int>(GetValue<int64_t>(value)); (*node_attr)[attr_name].set_i(attr_value); } else if (type == "str") {//如果函数类型为str,执行循环体 auto attr_value = GetValue<std::string>(value); (*node_attr)[attr_name].set_s(attr_value); } else if (type == "bool") {//如果函数类型为bool,执行循环体 auto attr_value = GetValue<bool>(value); (*node_attr)[attr_name].set_b(attr_value); } else if (type == "float") {//如果函数类型为float,执行循环体 auto attr_value = GetValue<float>(value); (*node_attr)[attr_name].set_f(attr_value); } else if (type == "listInt") {//如果函数类型为litint,执行循环体 std::vector<int64_t> attr_value; auto value_type = value->type(); MS_EXCEPTION_IF_NULL(value_type); auto value_type_str = value_type->ToString(); if (value_type_str == "Int64") {//如果函数类型为int64,执行循环体 auto data = GetValue<int64_t>(value); attr_value.push_back(data); } else { attr_value = GetValue<std::vector<int64_t>>(value); } mindspore::AttrValue input_shape_attr; mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array(); MS_EXCEPTION_IF_NULL(input_shape_attr_list); for (const auto shape : attr_value) {//增强forx input_shape_attr_list->add_i(shape); } (*node_attr)[attr_name] = input_shape_attr; } else {//都不符合上述类型,输出不支持该类型 MS_LOG(EXCEPTION) << "type: " << type << "not support"; }}void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(proto); std::string op_name = AnfAlgo::GetCNodeName(anf_node); if (op_name == kInitDataSetQueue) {//if语句 判断op_name类型 op_name = kInitData; } if (op_name == kPrint) { return; } auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU); MS_EXCEPTION_IF_NULL(op_info_ptr); auto attrs_ptr = op_info_ptr->attrs_ptr(); auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs(); for (const auto &attr_ptr : attrs_ptr) { MS_EXCEPTION_IF_NULL(attr_ptr); std::string attr_name = attr_ptr->name(); auto value = primitive->GetAttr(attr_name); if (value != nullptr) {//if语句判断数值大小 if (attr_name == kQueueName || attr_name == kSharedName) { attr_name = kChannelName; } else if (attr_name == kSeed0) { attr_name = kSeed; } else if (attr_name == kSeed1) { attr_name = kSeed2; } std::string type = attr_ptr->type(); ParseAttrValue(type, attr_name, value, node_attr); } }}//设置节点输入void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) { MS_EXCEPTION_IF_NULL(proto); MS_EXCEPTION_IF_NULL(anf_node); size_t input_num = AnfAlgo::GetInputTensorNum(anf_node); if (input_num == 0) {//如果节点输入数字为0,执行下面语句 MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input."; return; } for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环,用于设置输入节点的大小 ::mindspore::Tensor *node_inputs = proto->add_inputs(); MS_EXCEPTION_IF_NULL(node_inputs); TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index); std::vector<size_t> input_shape; int32_t input_data_type; if (input_type == kObjectTypeString) { auto cnode = anf_node->cast<CNodePtr>();//声明变量,并调用函数赋初值 MS_EXCEPTION_IF_NULL(cnode); auto input_node = cnode->inputs()[input_index + 1]; auto value_ptr = GetValueNode(input_node); auto value = GetValue<std::string>(value_ptr); input_shape.push_back(1); input_shape.push_back(value.size()); input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown); } else { input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index); input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type); } mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape(); for (auto item : input_shape) {//增强for循环,遍历数组设置节点数据类型和形状 mindspore::TensorShape_Dim *dim = tensorShape->add_dim(); dim->set_size((::google::protobuf::int64)item); } node_inputs->set_tensor_type(input_data_type);//根据结构体调用函数 node_inputs->set_mem_device("HBM"); }}
-
** "mindspore\lite\src\lite_kernel_util.cc"注释1** ======================================= ```python /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/lite_kernel_util.h" #include #include #include "src/sub_graph_kernel.h" //命名空间内核 namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; //节点图输入节点 std::vector LiteKernelUtil::SubgraphInputNodes(const std::vector &kernels) { std::vector input_nodes;//创建一个装输入节点的容器 for (const auto &kernel : kernels) { // 如果内核没有预内核,内核是图输入,它必须是子图输入 if (kernel->in_kernels().empty() && !kernel->in_tensors().empty()) { if (!lite::IsContain(input_nodes, kernel)) { input_nodes.push_back(kernel);//遍历添加内核 } continue; } auto all_input_tensors = kernel->in_tensors(); // 从输入张量中删除所有常量张量 for (auto iter = all_input_tensors.begin(); iter != all_input_tensors.end();) {//遍历输入tensors if ((*iter)->IsConst()) { iter = all_input_tensors.erase(iter);//删除iter } else { iter++; } } for (const auto &kernel_in_subgraph : kernels) { // 从子图中的内核中删除输入张量 for (const auto *tensor : kernel_in_subgraph->out_tensors()) {//遍历输出Tensors auto ret = std::find(all_input_tensors.begin(), all_input_tensors.end(), tensor);//寻找相应的tensor if (ret != all_input_tensors.end()) {//判断是否包含 all_input_tensors.erase(ret); } } } // 如果某些输入张量不是来自子图中的内核 if (!all_input_tensors.empty()) {// if (!lite::IsContain(input_nodes, kernel)) { input_nodes.push_back(kernel);//添加内核 } } } return input_nodes;//返回输入节点 } //子图输出节点函数 std::vector LiteKernelUtil::SubgraphOutputNodes( const std::vector &kernels) { std::vector output_nodes; // 如果kernel没有post-kernel,kernel是图输出,一定是子图输出 for (const auto &kernel : kernels) {//遍历内核 if (kernel->is_model_output() || (kernel->out_kernels().empty() && !kernel->out_tensors().empty())) {//检查是否为空 if (!lite::IsContain(output_nodes, kernel)) {//判断该节点是否包含该内核 output_nodes.push_back(kernel);//添加内核 } continue; } for (const auto &output : kernel->out_kernels()) {//遍历输出内核 auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output);//寻找内核的位置 if (out_kernel_in_graph == kernels.end()) {//判断是否输出成功 if (!lite::IsContain(output_nodes, kernel)) { output_nodes.push_back(kernel);//遍历添加内核 } break; } } } return output_nodes;//返回输出节点 }
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\update_cache_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/update_cache_cpu_kernel.h" #include #include "runtime/device/cpu/cpu_device_address.h" //声明双重空间 namespace mindspore { namespace kernel { //初始化内核 void UpdateCacheCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 node_wpt_ = kernel_node; input_x_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取x的数据类型 indices_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1);//获取y的数据类型 if (input_x_dtype_ == kNumberTypeFloat32 || input_x_dtype_ == kNumberTypeInt32) { input_x_dtype_size_ = 4; } else if (input_x_dtype_ == kNumberTypeFloat64 || input_x_dtype_ == kNumberTypeInt64) { input_x_dtype_size_ = 8; } else {//错误日志:输入值x只支持 float32, float64, int32, int64的数据类型 MS_LOG(EXCEPTION) "input_x dtype only support float32, float64, int32, int64"; } } //检查数据类型 bool UpdateCacheCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (indices_dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else if (indices_dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } else { MS_LOG(ERROR) "indices dtype only support int32, int64"; return false; } return true; } //检查内核 template void UpdateCacheCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto node_ = node_wpt_.lock();//获取节点 if (!node_) {//节点工作空间失效 MS_LOG(EXCEPTION) "node_wpt_ is expired."; } auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 2); batch_size_ = 1;//初始化变量 for (size_t i = 0; i indices_shape.size(); ++i) {//循环遍历获取批处理大小 batch_size_ *= indices_shape[i]; } MS_LOG(INFO) "UpdateCache batch_size:" batch_size_;//更新批处理大小 update_size_ = 1; for (size_t i = 0; i update_shape.size(); ++i) { update_size_ *= update_shape[i]; } update_length_ = update_shape[1]; //类型转换 char *input_x = reinterpret_cast(inputs[0]->addr); T *indices = reinterpret_cast(inputs[1]->addr); char *update = reinterpret_cast(inputs[2]->addr); max_num_ = *reinterpret_cast(inputs[3]->addr); size_t one_length_size = input_x_dtype_size_ * update_length_; auto max_size = inputs[0]->size; for (size_t i = 0; i batch_size_; ++i) { if (indices[i] 0 || indices[i] >= max_num_) continue; char *tmp = update + i * one_length_size; if (indices[i] * one_length_size + one_length_size = max_size) { int ret = memcpy_s(input_x + indices[i] * one_length_size, max_size - indices[i] * one_length_size, tmp, one_length_size); if (ret != 0) { MS_LOG(EXCEPTION) "memcpy_s error, errorno" ret; } } else {//字符串拷贝c MS_LOG(EXCEPTION) "Memcpy out of size"; } } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unsorted_segment_sum_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unsorted_segment_sum_cpu_kernel.h" //导入系统自带的库 #include #include "runtime/device/cpu/cpu_device_address.h" //导入自定义的库 #include "common/thread_pool.h" //双重空间 namespace mindspore { namespace kernel { //初始化内核 void UnsortedSegmentSumCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入值个数 if (input_num != 2) {//如果输入值个数不为2,则记录错误日志:输入值个数是...,但是内核需要2个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UnsortedSegmentSum needs 2 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//如果输出值个数不为2,则记录错误日志:输出值个数是...,但是内核需要1个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but UnsortedSegmentSum needs 1 output."; } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 segment_ids_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1);//获取片段数据类型 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 auto segment_ids_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); for (size_t i = 0; i input_shape.size(); ++i) { unit_num_ *= input_shape[i]; if (i >= segment_ids_shape.size()) { input_dim1_ *= input_shape[i]; } } output_dim0_ = output_shape[0]; for (size_t j = 1; j output_shape.size(); j++) { output_dim1_ *= output_shape[j]; } } //检查数据类型 bool UnsortedSegmentSumCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { bool ret{true}; if (dtype_ == kNumberTypeInt32 && segment_ids_dtype_ == kNumberTypeInt32) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 && segment_ids_dtype_ == kNumberTypeInt32) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt32 && segment_ids_dtype_ == kNumberTypeInt64) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 && segment_ids_dtype_ == kNumberTypeInt64) { ret = LaunchKernel(inputs, outputs); } else {//错误日志:输入值x仅支持int32 and float32, indices int32 and int64的数据类型 MS_LOG(ERROR) "Only support input_x int32 and float32, indices int32 and int64"; return false; } return ret; } //检查内核 template bool UnsortedSegmentSumCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { //类型转换 S *input_addr = reinterpret_cast(inputs[0]->addr); T *indices_addr = reinterpret_cast(inputs[1]->addr); S *output_addr = reinterpret_cast(outputs[0]->addr); auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); if (ret != EOK) {//错误日志:输出增幅函数s MS_LOG(ERROR) "Output buff memset fail. ret:" ret; return false; } for (size_t i = 0; i unit_num_; ++i) { size_t j = i / input_dim1_; size_t k = i % input_dim1_; T index = indices_addr[j]; if (index 0 || index >= SizeToInt(output_dim0_)) { continue; } size_t output_index = index * output_dim1_ + k; output_addr[output_index] += input_addr[i]; } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unpack_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unpack_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 template void UnpackCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); int64_t axis_tmp = AnfAlgo::GetNodeAttr(kernel_node, "axis"); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值地址 if (axis_tmp 0) { axis_tmp += SizeToLong(input_shape.size()); } size_t axis_ = LongToSize(axis_tmp); output_num_ = LongToSize(AnfAlgo::GetNodeAttr(kernel_node, "num"));//获取输出值个数 for (size_t i = 0; i input_shape.size(); i++) { input_size_ *= input_shape[i]; if (i > IntToSize(axis_)) { dims_after_axis_ *= input_shape[i]; } } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取输出值输据类型 } //初始化输入输出值大小 template void UnpackCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); workspace_size_list_.emplace_back(sizeof(T *) * output_num_); } //检查输入输出值,工作空间 template bool UnpackCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { LaunchKernel(inputs, workspace, outputs); return true; } //检查参数 template void UnpackCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { input_ = reinterpret_cast(inputs[0]->addr);//类型转换 MS_EXCEPTION_IF_NULL(input_);//判断输入值是否为空 outputs_host_ = reinterpret_cast(workspace[0]->addr); MS_EXCEPTION_IF_NULL(outputs_host_);//判断输出值是否为空 for (size_t i = 0; i outputs.size(); i++) { outputs_host_[i] = reinterpret_cast(outputs[i]->addr); MS_EXCEPTION_IF_NULL(outputs_host_[i]);//获取输入主机是否为0 } size_t number_of_reset = output_num_ * dims_after_axis_; auto task = [this, number_of_reset](const size_t start, const size_t end) { for (size_t i = start; i end; ++i) { size_t output_index = (i / dims_after_axis_) % output_num_; size_t tensor_index = i / number_of_reset * dims_after_axis_ + i % dims_after_axis_; outputs_host_[output_index][tensor_index] = input_[i]; } }; CPUKernelUtils::ParallelFor(task, input_size_); } //检查参数 template void UnpackCPUKernel::CheckParam(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//如果输入值个数不为1,输入值是...但是内核需要一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UnpackCPUKernel needs 1 input."; } } } // namespace kernel } // namespace mindspore ```
上滑加载中
推荐直播
-
OpenHarmony应用开发之网络数据请求与数据解析
2025/01/16 周四 19:00-20:30
华为开发者布道师、南京师范大学泰州学院副教授,硕士研究生导师,开放原子教育银牌认证讲师
科技浪潮中,鸿蒙生态强势崛起,OpenHarmony开启智能终端无限可能。当下,其原生应用开发适配潜力巨大,终端设备已广泛融入生活各场景,从家居到办公、穿戴至车载。 现在,机会敲门!我们的直播聚焦OpenHarmony关键的网络数据请求与解析,抛开晦涩理论,用真实案例带你掌握数据访问接口,轻松应对复杂网络请求、精准解析Json与Xml数据。参与直播,为开发鸿蒙App夯实基础,抢占科技新高地,别错过!
回顾中 -
Ascend C高层API设计原理与实现系列
2025/01/17 周五 15:30-17:00
Ascend C 技术专家
以LayerNorm算子开发为例,讲解开箱即用的Ascend C高层API
回顾中
热门标签