• [基础知识] mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\mkldnn\mkl
    /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h"#include <vector>#include <string>#include <algorithm>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,                              const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size,                              const std::vector<int> &stride, std::vector<int> *padding_l, std::vector<int> *padding_r,                              const std::vector<int> &dilation) {  MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空  if (src_shape.size() < 2) {//判断src_shape的大小是否小于2    MS_LOG(EXCEPTION) << "set pad only support src dim >= 2!";  }  std::vector<int> weight_height;  weight_height.emplace_back(src_shape[src_shape.size() - 2]);  weight_height.emplace_back(src_shape[src_shape.size() - 1]);  MS_LOG(INFO) << "pad mode: " << pad_mode;  if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) {    for (size_t i = 0; i < weight_height.size(); ++i) {      auto wh = weight_height[i];      int out = (wh + stride[i] - 1) / stride[i];      int effective_k = (SizeToInt(kernel_size[i]) - 1) * dilation[i] + 1;      int pad_along = std::max(0, (out - 1) * stride[i] + effective_k - wh);      int pad = pad_along / 2;      padding_l->emplace_back(pad);      padding_r->emplace_back(pad_along - pad);    }  } else if (pad_mode == PAD_MODE_LOWER_VALID || pad_mode == PAD_MODE_UPPER_VALID) {    MS_LOG(INFO) << "pad valid";    padding_l->emplace_back(0);    padding_l->emplace_back(0);    padding_r->emplace_back(0);    padding_r->emplace_back(0);  } else {    std::vector<int> pad;    std::vector<int64_t> pad_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, PAD_LIST);    (void)std::transform(pad_me.begin(), pad_me.end(), std::back_inserter(pad),                         [](const int64_t &value) { return static_cast<int>(value); });    padding_l->emplace_back(pad[0]);    padding_l->emplace_back(pad[2]);    padding_r->emplace_back(pad[1]);    padding_r->emplace_back(pad[3]);  }}bool MKLCPUKernel::BinaryBroadCast(std::vector<size_t> *src0_shape, std::vector<size_t> *src1_shape,                                   std::vector<size_t> *dst_shape) {  MS_EXCEPTION_IF_NULL(src0_shape);//空指针异常  MS_EXCEPTION_IF_NULL(src1_shape);  MS_EXCEPTION_IF_NULL(dst_shape);  bool need_swap = false;  if (dst_shape->size() == 0) {//如果dst大小等于0,执行下面循环体    dst_shape->emplace_back(1);    src0_shape->emplace_back(1);    src1_shape->emplace_back(1);  }  MS_LOG(DEBUG) << "Binary broadcast in: src0: " << *src0_shape << " src1: " << *src1_shape << " dst: " << *dst_shape;  if (src0_shape->size() != dst_shape->size()) {//如果src0_shaspe的大小和dst-shape的大小不相等,执行限免循环体    need_swap = true;    for (size_t i = src0_shape->size(); i < dst_shape->size(); ++i) {      src0_shape->insert(src0_shape->begin(), 1);//src0_shape与dst_shape大小差几位,就在数的最前面加几个1    }  } else if (src1_shape->size() != dst_shape->size()) {    for (size_t i = src1_shape->size(); i < dst_shape->size(); ++i) {//src1_shape与dst_shape大小差几位,就在数的最前面加几个1      src1_shape->insert(src1_shape->begin(), 1);    }  }  if (src0_shape->size() == src1_shape->size()) {//如果src0_shape的大小和src1_shape的大小相等,执行下面循环体    bool visit_src0 = false;//访问src0失败    bool visit_src1 = false;//访问src1失败    for (size_t i = 0; i < src0_shape->size(); ++i) {      if (src0_shape->at(i) != src1_shape->at(i)) {        //src0_shape和src1_shape中i对应指向的数哪个是1,哪个就访问成功,保留1         if (src0_shape->at(i) == 1 && !visit_src1) {//如果src0_shape中i指向值为1,访问成功,需要互换          need_swap = true;          visit_src0 = true;        } else if (src1_shape->at(i) == 1 && !visit_src0) {//如果src0_shape中i指向值不是,1src1_shape中i指向值为1,访问成功,不需要需要互换          need_swap = false;          visit_src1 = true;        } else {//其他情况属于无效          MS_LOG(EXCEPTION) << "Invalid broadcast! " << *src0_shape << " vs " << *src1_shape;        }      }    }  } else {    MS_LOG(EXCEPTION) << "Invalid broadcast! src0: " << *src0_shape << " src1: " << *src1_shape                      << " dst: " << *dst_shape;  }  MS_LOG(DEBUG) << "Binary broadcast out: src0: " << *src0_shape << " src1: " << *src1_shape << " dst: " << *dst_shape;  return need_swap;}dnnl::memory::format_tag MKLCPUKernel::GetDefaultFormatTag(const dnnl::memory::dims &dims) const {  dnnl::memory::format_tag mem_tag;  auto dim_size = dims.size();//调用函数获取dims的大小  //判断dims的大小到底为下面哪种情况  if (dim_size == 5) {    mem_tag = dnnl::memory::format_tag::abcde;  } else if (dim_size == 4) {    mem_tag = dnnl::memory::format_tag::abcd;  } else if (dim_size == 3) {    mem_tag = dnnl::memory::format_tag::abc;  } else if (dim_size == 2) {    mem_tag = dnnl::memory::format_tag::ab;  } else if (dim_size == 1) {    mem_tag = dnnl::memory::format_tag::a;  } else {    MS_LOG(EXCEPTION) << "kernel dims invalid " << dim_size;//错误日志,内核无效  }  return mem_tag;//返回值}dnnl::memory::desc MKLCPUKernel::GetDefaultMemDesc(const std::vector<size_t> &shape) {  dnnl::memory::dims dims;  if (shape.size() == 0) {    dims.insert(dims.end(), 1);  } else {    dims.insert(dims.end(), shape.begin(), shape.end());  }  dnnl::memory::format_tag mem_tag = GetDefaultFormatTag(dims);  dnnl::memory::desc mem_desc(dims, dnnl::memory::data_type::f32, mem_tag);  return mem_desc;}//函数调用void MKLCPUKernel::AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc) {  arguments_[arg_key] = MKLKernelEngine::Get().CreateMemory(mem_desc, alloc);}void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {  auto arg_iter = arguments_.find(arg_key);  if (arg_iter != arguments_.end()) {    arg_iter->second.set_data_handle(ptr);  }}void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }void MKLCPUKernel::Reorder(dnnl::memory *src_mem, dnnl::memory *dst_mem) {  MKLKernelEngine::Get().Reorder(src_mem, dst_mem);}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\mkldnn\lst
    /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/lstm_cpu_kernel.h"#include <string>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套const int kMaxLSTMLayer = 100;//申明变量并chu'sconst int kOutputWorkSpaceIndex = 3;void LstmCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {  CPUKernel::InitInputOutputSize(kernel_node);  output_size_list_[kOutputWorkSpaceIndex] = reserve_size_;}void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) {#ifdef PLATFORM_86  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);#endif  MS_EXCEPTION_IF_NULL(kernel_node);  using tag = dnnl::memory::format_tag;  using dim = dnnl::memory::dims;  CheckParam(kernel_node);//调用函数检查节点参数  auto eng = MKLKernelEngine::Get().engine();  dnnl::stream s(eng);  dnnl::rnn_direction direction = dnnl::rnn_direction::unidirectional;  if (bidirectional_) {    direction = dnnl::rnn_direction::bidirectional_concat;  }  //函数调用获取参数  dim src_dims = {seq_len_, batch_size_, input_size_};  dim src_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};  dim src_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};  weights_dims_ = {num_layers_, num_directions_, input_size_, 4, hidden_size_};  weights_h_dims_ = {num_layers_, num_directions_, hidden_size_, 4, hidden_size_};  bias_dims_ = {num_layers_, num_directions_, 4, hidden_size_};  dim dst_dims = {seq_len_, batch_size_, static_cast<int64_t>(hidden_size_) * num_directions_};  dim dst_h_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};  dim dst_c_dims = {num_layers_, num_directions_, batch_size_, hidden_size_};  dnnl::memory::desc src_desc = formatted_md(src_dims, tag::tnc);  dnnl::memory::desc src_h_desc = formatted_md(src_h_dims, tag::ldnc);  dnnl::memory::desc src_c_desc = formatted_md(src_c_dims, tag::ldnc);  dnnl::memory::desc bias_desc = formatted_md(bias_dims_, tag::ldgo);  dnnl::memory::desc dst_desc = formatted_md(dst_dims, tag::tnc);  dnnl::memory::desc dst_h_desc = formatted_md(dst_h_dims, tag::ldnc);  dnnl::memory::desc dst_c_desc = formatted_md(dst_c_dims, tag::ldnc);  //判断参数是否经过训练  if (!kernel_node->HasAttr(kAttrIsTraining)) {    MS_LOG(WARNING) << "LSTM has no attr is_training";  }  is_training = GetValue<bool>(kernel_node->GetAttr(kAttrIsTraining));  auto prop_kind = dnnl::prop_kind::forward_training;  if (!is_training) {    prop_kind = dnnl::prop_kind::forward_inference;  }  auto desc = std::make_shared<dnnl::lstm_forward::desc>(    prop_kind, direction, src_desc, src_h_desc, src_c_desc, formatted_md(weights_dims_, tag::any),    formatted_md(weights_h_dims_, tag::any), bias_desc, dst_desc, dst_h_desc, dst_c_desc);  prim_desc_ = dnnl::lstm_forward::primitive_desc(*desc, eng);  primitive_ = std::make_shared<dnnl::lstm_forward>(prim_desc_);  if (is_training) {    reserve_size_ = static_cast<size_t>(prim_desc_.workspace_desc().get_size());    AddArgument(DNNL_ARG_WORKSPACE, prim_desc_.workspace_desc());  } else {    reserve_size_ = 1;  }  AddArgument(DNNL_ARG_SRC_LAYER, src_desc);  AddArgument(DNNL_ARG_SRC_ITER, src_h_desc);  AddArgument(DNNL_ARG_SRC_ITER_C, src_c_desc);  AddArgument(DNNL_ARG_WEIGHTS_LAYER, prim_desc_.weights_layer_desc());  AddArgument(DNNL_ARG_WEIGHTS_ITER, prim_desc_.weights_iter_desc());  AddArgument(DNNL_ARG_BIAS, bias_desc);  AddArgument(DNNL_ARG_DST_LAYER, dst_desc);  AddArgument(DNNL_ARG_DST_ITER, dst_h_desc);  AddArgument(DNNL_ARG_DST_ITER_C, dst_c_desc);}//检查参数void LstmCPUKernel::CheckParam(const CNodePtr &kernel_node) {  std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);  std::vector<size_t> src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);  std::vector<size_t> src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2);  bidirectional_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "bidirectional");  input_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "input_size"));  hidden_size_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "hidden_size"));  num_layers_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num_layers"));  has_bias_ = AnfAlgo::GetNodeAttr<bool>(kernel_node, "has_bias");//存在的偏差  batch_size_ = SizeToInt(src_shape[1]);  seq_len_ = SizeToInt(src_shape[0]);  num_directions_ = 1;  if (bidirectional_) {    num_directions_ = 2;  }  const int gate_size = 4 * hidden_size_;  //确定层数在0到100之间  if (num_layers_ <= 0) {    MS_LOG(EXCEPTION) << "layers must be greater than zero!";  }  if (num_layers_ > kMaxLSTMLayer) {    MS_LOG(EXCEPTION) << "layers must be lower than 100!";  }  for (int i = 0; i < num_layers_; ++i) {    weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_);    weight_h_size_ += gate_size * hidden_size_;  }  weight_size_ = weight_size_ * num_directions_;  weight_h_size_ = weight_h_size_ * num_directions_;  if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) {    MS_LOG(EXCEPTION) << "error iteration shape!";  }  if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) {    MS_LOG(EXCEPTION) << "lstm only support 3-D input!";  }}bool LstmCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,                           const std::vector<kernel::AddressPtr> & /*workspace*/,                           const std::vector<kernel::AddressPtr> &outputs) {  using dt = dnnl::memory::data_type;//起别名数据属性  using tag = dnnl::memory::format_tag;     //函数调用,并起别名代替原来复杂函数名  auto eng = MKLKernelEngine::Get().engine();  auto user_weights_memory = dnnl::memory(dnnl::memory::desc{{weights_dims_}, dt::f32, tag::ldgoi}, eng);  auto user_weights_h_memory = dnnl::memory(dnnl::memory::desc{{weights_h_dims_}, dt::f32, tag::ldgoi}, eng);  auto weights_memory = dnnl::memory(prim_desc_.weights_layer_desc(), eng);  auto weights_h_memory = dnnl::memory(prim_desc_.weights_iter_desc(), eng);  user_weights_memory.set_data_handle(inputs[3]->addr);  user_weights_h_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_);  Reorder(&user_weights_memory, &weights_memory);  Reorder(&user_weights_h_memory, &weights_h_memory);  auto bias_memory = dnnl::memory(prim_desc_.bias_desc(), eng);  if (has_bias_) {    bias_memory.set_data_handle(reinterpret_cast<float *>(inputs[3]->addr) + weight_size_ + weight_h_size_);  } else {    if (memset_s(bias_memory.get_data_handle(), prim_desc_.bias_desc().get_size(), 0,                 prim_desc_.bias_desc().get_size())) {      MS_LOG(EXCEPTION) << "bias memset error";//偏差函数错误    }  }  // set handle类型函数  SetArgumentHandle(DNNL_ARG_SRC_LAYER, inputs[0]->addr);  SetArgumentHandle(DNNL_ARG_SRC_ITER, inputs[1]->addr);  SetArgumentHandle(DNNL_ARG_SRC_ITER_C, inputs[2]->addr);  SetArgumentHandle(DNNL_ARG_WEIGHTS_LAYER, weights_memory.get_data_handle());  SetArgumentHandle(DNNL_ARG_WEIGHTS_ITER, weights_h_memory.get_data_handle());  SetArgumentHandle(DNNL_ARG_BIAS, bias_memory.get_data_handle());  SetArgumentHandle(DNNL_ARG_DST_LAYER, outputs[0]->addr);  SetArgumentHandle(DNNL_ARG_DST_ITER, outputs[1]->addr);  SetArgumentHandle(DNNL_ARG_DST_ITER_C, outputs[2]->addr);  if (is_training) {    SetArgumentHandle(DNNL_ARG_WORKSPACE, outputs[3]->addr);  }  ExecutePrimitive();  return true;}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\l2normaliz
    /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的文件#include "backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套template <typename T>void L2NormalizeGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {  MS_EXCEPTION_IF_NULL(kernel_node);  CheckIONumber(kernel_node);  for (size_t i = 0; i < INPUT_SIZE; i++) {//遍历数组    input_shape_list_.emplace_back(AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i));  }  auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);  CheckInputShape(output_shape);//调用函数检查输入空间大小  int output_dim_length = output_shape.size();  dim_elem_num_list_.resize(output_dim_length, 1);  for (int i = output_dim_length - 2; i >= 0; i--) {    dim_elem_num_list_[i] = output_shape[i + 1] * dim_elem_num_list_[i + 1];  }  int axis = LongToInt(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "axis"));  int input_dim_length = SizeToInt(input_shape_list_[0].size());  axis_ = axis < 0 ? (axis + input_dim_length) : axis;  epsilon_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon"));}template <typename T>bool L2NormalizeGradCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,                                         const std::vector<AddressPtr> &workspace,                                         const std::vector<AddressPtr> &outputs) {  //起别名  auto input_x = reinterpret_cast<T *>(inputs[0]->addr);  auto y = reinterpret_cast<T *>(inputs[1]->addr);  auto dout = reinterpret_cast<T *>(inputs[2]->addr);  auto output = reinterpret_cast<T *>(outputs[0]->addr);  auto output_size = outputs[0]->size / sizeof(T);  auto task = [&](size_t start, size_t end) {    for (size_t i = start; i < end; i++) {//遍历获取输入输出信息      std::vector<size_t> high_dim_index;      OneDimIndexToHighDimIndex(i, &high_dim_index);      std::vector<T> input_x_vector;      GetVector(&input_x_vector, high_dim_index, input_x);      std::vector<T> dout_vector;      GetVector(&dout_vector, high_dim_index, dout);      std::vector<T> y_vector;      GetVector(&y_vector, high_dim_index, y);      GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]);    }  };  CPUKernelUtils::ParallelFor(task, output_size);  return true;}template <typename T>void L2NormalizeGradCPUKernel<T>::CheckInputShape(const std::vector<size_t> &output_shape) {  for (const auto &shape : input_shape_list_) {//增强for循环 ,遍历数组   //判断输入和输出类型是否相同,若不同记录错误日志    if (output_shape != shape) {      MS_LOG(EXCEPTION) << "Input shape and output shape should be same.";    }  }  auto input_x_shape = input_shape_list_[0]; //判断输入类型是否为空  if (input_x_shape.size() != 0) {    if (std::any_of(input_x_shape.begin(), input_x_shape.end(), [](size_t i) { return i == 0; })) {      MS_LOG(EXCEPTION) << "L2NormalizeCPUKernel input is null.";    }  }}template <typename T>void L2NormalizeGradCPUKernel<T>::CheckIONumber(const CNodePtr &kernel_node) {  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入数值  if (input_num != INPUT_SIZE) {//判断输入值大小是否符合要求    MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but L2NormalizeGradCPUKernel needs 3 input.";  }  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);  if (output_num != OUTPUT_SIZE) {//判断输出值大小是否符合要求    MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but L2NormalizeGradCPUKernel needs 1 output.";  }}template <typename T>void L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index) {  for (const auto &item : dim_elem_num_list_) {    high_dim_index->push_back(one_dim_index / item);    one_dim_index %= item;  }}template <typename T>void L2NormalizeGradCPUKernel<T>::HighDimIndexToOneDimIndex(size_t *one_dim_index,                                                            const std::vector<size_t> &high_dim_index) {  *one_dim_index = 0;  int len = high_dim_index.size();  for (int i = 0; i < len; i++) {    *one_dim_index += high_dim_index[i] * dim_elem_num_list_[i];  }}template <typename T>void L2NormalizeGradCPUKernel<T>::GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index,                                            const T *x) {  auto x_shape = input_shape_list_[0];  for (size_t i = 0; i < x_shape[axis_]; i++) {//遍历数组    size_t oneDimIndex = 0;    std::vector<size_t> tmp_high_dim_index = high_dim_index;    tmp_high_dim_index[axis_] = i;    HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index);    x_vector->push_back(x[oneDimIndex]);  }}template <typename T>void L2NormalizeGradCPUKernel<T>::GetSumOfProduct(const std::vector<T> &x_vector, const std::vector<T> &y_vector,                                                  T *ss) {  size_t len = x_vector.size();  std::vector<T> tmp_vector(len);  for (size_t i = 0; i < len; i++) {//b    tmp_vector[i] = x_vector[i] * y_vector[i];  }  if (len % 2 == 1) {    tmp_vector[0] += tmp_vector[len - 1];  }  for (size_t stride = len / 2; stride > 0; stride >>= 1) {    for (size_t i = 0; i < stride; i++) {      tmp_vector[i] += tmp_vector[i + stride];    }    if (stride > 2 && stride % 2 == 1) {      tmp_vector[0] += tmp_vector[stride - 1];    }  }  *ss = tmp_vector[0];}template <typename T>void L2NormalizeGradCPUKernel<T>::GetOutput(const std::vector<T> &input_x_vector, const std::vector<T> &y_vector,                                            const std::vector<T> &dout_vector,                                            const std::vector<size_t> &high_dim_index, T *output) {  size_t axis_index = high_dim_index[axis_];  T dout = dout_vector[axis_index];  T y = y_vector[axis_index];  T tmp_sum1;  GetSumOfProduct(y_vector, dout_vector, &tmp_sum1);  T tmp_sum2;  GetSumOfProduct(input_x_vector, input_x_vector, &tmp_sum2);  tmp_sum2 = sqrt(tmp_sum2);  if (tmp_sum2 >= epsilon_) {    *output = (dout - y * tmp_sum1) / tmp_sum2;  } else {    *output = (dout - y * tmp_sum1) / epsilon_;  }}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\ccsrc\backend\kernel_query.cc代码解读
    /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入系统自带的或者自定义的包#include "backend/kernel_compiler/kernel_query.h"#include <memory>#include <algorithm>#include "backend/kernel_compiler/aicpu/aicpu_kernel_metadata.h"#include "backend/kernel_compiler/host/host_kernel_metadata.h"#include "backend/kernel_compiler/rts/rt_kernel_info.h"#include "backend/kernel_compiler/hccl/hccl_kernel_metadata.h"#include "backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.h"#include "backend/kernel_compiler/akg/akg_kernel_metadata.h"#include "backend/session/anf_runtime_algorithm.h"#include "utils/ms_context.h"#include "utils/trace_base.h"//声明一个空间namespace mindspore {namespace kernel {//嵌套一个空间namespace {void FilterInvalidKernelInfo(const CNodePtr &kernel_node,//过滤无效的算子内核信息                             std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {  MS_EXCEPTION_IF_NULL(kernel_info_list);  MS_EXCEPTION_IF_NULL(kernel_node);  size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(kernel_node);//获取输出变量的大小  size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入变量的大小  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_list;  (void)std::copy_if(    kernel_info_list->begin(), kernel_info_list->end(), std::back_inserter(filtered_list),    [output_tensor_num, input_tensor_num](const std::shared_ptr<kernel::KernelBuildInfo> &kernel_build_info) {      return kernel_build_info->GetOutputNum() == output_tensor_num &&//获取返回输入输出值             kernel_build_info->GetInputNum() == input_tensor_num;    });  if (!filtered_list.empty()) {//清理过滤信息    kernel_info_list->clear();    (void)std::copy(filtered_list.begin(), filtered_list.end(), std::back_inserter(*kernel_info_list));  } else {//所有内核信息列表不匹配任何内核信息    MS_LOG(INFO) << "All kernel Info list does not match any kernel info ";    for (size_t index = 0; index < kernel_info_list->size(); ++index) {//遍历内核信息      std::ostringstream buffer;      auto &kernel_info = kernel_info_list->at(index);//声明变量起别名存储内核信息      MS_EXCEPTION_IF_NULL(kernel_info);      if (kernel_info->GetOutputNum() != output_tensor_num) {//如果获得的输出内核信息数与输出信息数不相等,无法匹配输出信息大小        buffer << "Kernel node's output size [" << output_tensor_num << "]"               << " cannot match the kernel's output size [" << kernel_info->GetOutputNum() << "]";      } else {//无法匹配输入大小        buffer << "Kernel node's output size [" << input_tensor_num << "]"               << " cannot match the kernel's output size [" << kernel_info->GetInputNum() << "]";      }      MS_LOG(INFO) << "kernel [ " << index << " ] :" << kernel_info->ToString() << buffer.str();    }    kernel_info_list->clear();//调用结构体函数清理信息    MS_LOG(INFO) << "node" << kernel_node->DebugString() << "'s output size : [" << output_tensor_num << "]"                 << "input size : [" << input_tensor_num << "] cannot match any kernelInfo !";  }}}  // namespace//KernelQuery的所有信息void KernelQueryAll(const CNodePtr &kernel_node,                    std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {  MS_EXCEPTION_IF_NULL(kernel_node);  MS_EXCEPTION_IF_NULL(kernel_info_list);//信息列表  std::string op_name = AnfAlgo::GetCNodeName(kernel_node);  TbeMetadataInfo(kernel_node, kernel_info_list);  if (kernel_info_list->empty()) {//如果没有找到信息,尝试去获取信息    AicpuMetadataInfo(kernel_node, kernel_info_list);    if (!kernel_info_list->empty()) {      MS_LOG(INFO) << "The node [" << kernel_node->DebugString()                   << "] cannot find valid TBE kernel info, try to get aicpu kernel info";      AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node);    }  }  if (kernel_info_list->empty()) {    GetRtKelInfo(kernel_node, kernel_info_list);  }  if (kernel_info_list->empty()) {    HcclMetadataInfo(kernel_node, kernel_info_list);  }  if (kernel_info_list->empty()) {    HostMetadataInfo(kernel_node, kernel_info_list);  }  if (kernel_info_list->empty()) {    MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op [" << op_name << ", "                                 << kernel_node->fullname_with_scope()                                 << "]. Node DebugString:" << kernel_node->DebugString()                                 << ", maybe the operator can not supported on current platform. \n trace "                                 << trace::DumpSourceLines(kernel_node);  }}    //信息查询void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list,                 KernelType kernel_type) {  MS_EXCEPTION_IF_NULL(kernel_node);  MS_EXCEPTION_IF_NULL(kernel_info_list);  auto context_ptr = MsContext::GetInstance();//声明变量并赋值  MS_EXCEPTION_IF_NULL(context_ptr);  const PrimitivePtr kPrimProdForceSeA = std::make_shared<Primitive>("ProdForceSeA");  if (IsPrimitiveCNode(kernel_node, kPrimProdForceSeA)) {    kernel_type = KernelType::AKG_KERNEL;  }  switch (kernel_type) {//确定内核的形式,从而执行相应的操作    case KernelType::AKG_KERNEL:      AkgMetadataInfo(kernel_node, kernel_info_list);      break;    default:      KernelQueryAll(kernel_node, kernel_info_list);      break;  }  if (kernel_info_list->empty()) {//r如果核心信息为空,执行下面结构体    MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op ["                                 << AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope()                                 << "]. Node DebugString:" << kernel_node->DebugString()                                 << ", maybe the operator can not supported on current platform. \n trace "                                 << trace::DumpSourceLines(kernel_node);  }  //核对输出值  FilterInvalidKernelInfo(kernel_node, kernel_info_list);}  //AI的cpu信息查询void AICPUQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {  MS_EXCEPTION_IF_NULL(kernel_node);  MS_EXCEPTION_IF_NULL(kernel_info_list);  kernel_info_list->clear();  AicpuMetadataInfo(kernel_node, kernel_info_list);  FilterInvalidKernelInfo(kernel_node, kernel_info_list);}//判断是否支持AICPUbool IsSupportedByAICPU(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) {  MS_EXCEPTION_IF_NULL(kernel_node);  MS_EXCEPTION_IF_NULL(select_kernel_build_info);  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;  auto cnode = kernel_node->cast<CNodePtr>();  MS_EXCEPTION_IF_NULL(cnode);  AICPUQuery(cnode, &kernel_info_list);  return std::any_of(kernel_info_list.begin(), kernel_info_list.end(),//返回值                     [&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) {                       MS_EXCEPTION_IF_NULL(item);                       return item->IsSimilarityKernelBuildInfo(*select_kernel_build_info);                     });}//判断是否已经支持AICPUbool IsSupportedByAICore(const AnfNodePtr &kernel_node, const KernelBuildInfoPtr &select_kernel_build_info) {  MS_EXCEPTION_IF_NULL(kernel_node);  MS_EXCEPTION_IF_NULL(select_kernel_build_info);  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;  auto cnode = kernel_node->cast<CNodePtr>();//申明变量并赋值  MS_EXCEPTION_IF_NULL(cnode);  TbeMetadataInfo(cnode, &kernel_info_list);  return std::any_of(kernel_info_list.begin(), kernel_info_list.end(),                     [&select_kernel_build_info](const kernel::KernelBuildInfoPtr item) {                       MS_EXCEPTION_IF_NULL(item);                       return *item == *select_kernel_build_info;                     });}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\hccl\hccl_kern
    const std::vector<size_t> &HcclKernel::GetInputSizeList() const {  size_t size = 0;  if (!input_size_list_.empty()) {    return input_size_list_;  }  for (ulong i = 0; i < hccl_data_type_list_.size(); ++i) {//循环查看大小    if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[i], hccl_kernel_input_shape_list_[i], &size)) {      MS_LOG(ERROR) << "GetHcclOpInputSize failed";    }    input_size_list_.push_back(size);  }  return input_size_list_;}const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {  auto anf_node = anf_node_.lock();//声明变量并赋值  if (!anf_node) {//如果    MS_LOG(EXCEPTION) << "anf_node pointer is expired.";  }  size_t size = 0;  //如果输出大小不为空,返回输出值  if (!output_size_list_.empty()) {    return output_size_list_;  }  auto cnode = anf_node->cast<CNodePtr>();//声明变量并赋值  auto op_name = AnfAlgo::GetCNodeName(cnode);  int64_t rank_size = 1;  if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) {    rank_size = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrRankSize);  }  int64_t fusion = 0;  if (AnfAlgo::HasNodeAttr(kAttrFusion, cnode)) {    fusion = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);  }  ulong loop_size = hccl_data_type_list_.size();  if (AnfAlgo::GetInputTensorNum(anf_node) > 1 && op_name == kAllGatherOpName && fusion >= 1) {    loop_size *= rank_size;  }  if (op_name == kReduceScatterOpName && fusion >= 1) {    loop_size = AnfAlgo::GetOutputTensorNum(anf_node);  }  for (ulong i = 0; i < loop_size; ++i) {//遍历输出    if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[0], hccl_kernel_output_shape_list_[i], &size)) {      MS_LOG(ERROR) << "GetHcclOpOutputSize failed";    }    output_size_list_.push_back(size);  }  return output_size_list_;}const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const {  if (!workspace_size_list_.empty() || hccl_data_type_list_.empty()) {    return workspace_size_list_;  }  workspace_size_list_.emplace_back(hccl::CalcWorkspaceSize(anf_node_.lock(), hccl_data_type_list_[0]));  return workspace_size_list_;}std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inputs,                                             const std::vector<AddressPtr> &workspace,                                             const std::vector<AddressPtr> &outputs, uint32_t stream_id) {  auto anf_node = anf_node_.lock();  if (!anf_node) {    MS_LOG(EXCEPTION) << "anf_node pointer is expired.";//anf_node指针失效  }  std::string hccl_type = AnfAlgo::GetCNodeName(anf_node);  if (hccl_type == kReceive) {    if (outputs.empty()) {      MS_LOG(EXCEPTION) << "Outputs is empty";//输出值为空    }  } else if (inputs.empty() || outputs.empty()) {    MS_LOG(EXCEPTION) << "Inputs or outputs is empty";//输入或者输出值为空  }  stream_id_ = stream_id;  void *input_data_addr = nullptr;  if (hccl_type != kReceive) {    MS_EXCEPTION_IF_NULL(inputs.at(0));    input_data_addr = inputs.at(0)->addr;  }  MS_EXCEPTION_IF_NULL(outputs.at(0));  auto output_data_addr = outputs.at(0)->addr;//声明变量,用指针抵用输出at(0)位置数的地址给前面变量  std::vector<uint8_t> private_def;  HcclDataType data_type = hccl_data_type_list_[0];  std::vector<hccl::HcclTaskInfo> task_info;  bool ret = hccl::GenTask(anf_node, data_type, &task_info);  if (!ret) {    MS_LOG(EXCEPTION) << "Gen Task for " << anf_node->DebugString() << " failed.";  }  std::vector<TaskInfoPtr> results;  for (auto &task : task_info) {//遍历信息到任务日志    MS_LOG(INFO) << "HCCL Task : stream_id=" << stream_id << ", count=" << hccl_count_ << ", root_id=" << root_id_                 << ", op_type=" << static_cast<int>(op_type_) << ", data_type=" << static_cast<int>(data_type)                 << ", workspace_size=" << task.workspace_size << ", stream_num=" << task.stream_num                 << ", private_def_size=" << task.private_def.size();    private_def.resize(task.private_def.size());    auto sec_ret = memcpy_s(private_def.data(), private_def.size(), task.private_def.data(), task.private_def.size());    if (sec_ret != 0) {      MS_LOG(EXCEPTION) << "Set data memcpy_s failed, ret = " << sec_ret;    }    void *workspace_addr = nullptr;//地址指向的值为空    if (task.workspace_size != 0) {      if (workspace.empty()) {        MS_LOG(EXCEPTION) << "Workspace size list of " << anf_node->DebugString() << " is empty";      }      MS_EXCEPTION_IF_NULL(workspace.at(0));      workspace_addr = workspace.at(0)->addr;    }    results.emplace_back(std::make_shared<HcclTaskInfo>(      kernel_name_, stream_id, hccl::GetHcclType(anf_node), input_data_addr, output_data_addr, workspace_addr,      task.workspace_size, task.stream_num, private_def, hccl::GetHcclOpsKernelInfoStore(), hccl_count_, root_id_,      op_type_, data_type, group_, NeedDump()));  }  return results;//返回值}device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {  AddressPtrList inputs;  AddressPtrList workspaces;  AddressPtrList outputs;  device::KernelRuntime::GenLaunchArgs(*this, cnode_ptr, &inputs, &workspaces, &outputs);  std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_.lock()));  if (inputs.empty()) {    MS_LOG(EXCEPTION) << "Hccl kernel input is empty";//输入值为空  }  if (hccl_data_type_list_.empty()) {    MS_LOG(EXCEPTION) << "Hccl data type list is empty";//数据类型列表为空  }  MS_EXCEPTION_IF_NULL(inputs.at(0));  auto input_data_addr = inputs.at(0)->addr;//声明变量,用指针抵用输入at(0)位置数的地址给前面变量  MS_EXCEPTION_IF_NULL(outputs.at(0));  auto output_data_addr = outputs.at(0)->addr;//声明变量,返回输出at(0)位置上的数据的地址给前面变量  HcclDataType data_type = hccl_data_type_list_[0];  auto executor = std::make_shared<device::ascend::HcclDynamicKernel>(    hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);  return executor;}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\hccl\hccl_kern
    //导入自定义的头文件#include "backend/kernel_compiler/hccl/hccl_kernel.h"//导入系统自带的头文件#include <map>///导入自定义的头文件#include "backend/session/anf_runtime_algorithm.h"#include "utils/utils.h"#include "utils/ms_context.h"#include "runtime/device/kernel_runtime.h"#include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"#include "runtime/hccl_adapter/hccl_adapter.h"//using声明将成员变量引入到当前作用域中,使得在当前作用域下访问另一个作用域下的成员时无需使用限定符 ::using HcclTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::HcclTaskInfo>;using mindspore::ge::model_runner::HcclTaskInfo;namespace {//声明一个空间static std::map<std::string, std::string> kMsOpNameToHcomHcclType = {  {mindspore::kAllReduceOpName, mindspore::kHcomOpTypeAllReduce},  {mindspore::kAllGatherOpName, mindspore::kHcomOpTypeAllGather},  {mindspore::kBroadcastOpName, mindspore::kHcomOpTypeBroadcast},  {mindspore::kHcomSendOpName, mindspore::kHcomOpTypeSend},  {mindspore::kReceiveOpName, mindspore::kHcomOpTypeReceive},  {mindspore::kReduceScatterOpName, mindspore::kHcomOpTypeReduceScatter}};std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {  auto iter = kMsOpNameToHcomHcclType.find(ms_op_type);//声明变量调用函数并赋值  if (iter == kMsOpNameToHcomHcclType.end()) {//如果类型不一致,记录错误日志:无效类型    MS_LOG(EXCEPTION) << "Invalid MsOpType:" << ms_op_type;  }  return iter->second;}}  // namespacenamespace mindspore {//声明空间namespace kernel {//空间嵌套void HcclKernelFactory::Register(const std::string &name, HcclKernelCreater &&fun) {  hcclKernelMap_.emplace(name, std::move(fun));}std::shared_ptr<HcclKernel> HcclKernelFactory::Get(const std::string &name) {  const auto &map = Get().hcclKernelMap_;  auto it = map.find(name);//声明变量,并调用函数赋值  if (it != map.end() && it->second) {//判断类型是否相同    return (it->second)();  }  return nullptr;//返回值为空}HcclKernelFactory &HcclKernelFactory::Get() {  static HcclKernelFactory _this;  return _this;}HcclKernel::HcclKernel() : hccl_count_(0), op_type_(HCCL_REDUCE_SUM), root_id_(0), receive_type_(0) {}HcclKernel::~HcclKernel() {  hccl_kernel_input_shape_list_.clear();  hccl_kernel_output_shape_list_.clear();  hccl_data_type_list_.clear();  hccl_count_ = 0;  op_type_ = HCCL_REDUCE_SUM;  root_id_ = 0;  input_size_list_.clear();  output_size_list_.clear();  workspace_size_list_.clear();}bool HcclKernel::Init(const AnfNodePtr &anf_node) {  MS_EXCEPTION_IF_NULL(anf_node);  op_name_ = AnfAlgo::GetCNodeName(anf_node);//声明变量,并调用函数获取节点名字赋值个变量  if (op_name_ == kReceive) {//判断名字是否相同,相同时判断具体错误类型    if (!HcomUtil::GetHcomReceiveType(anf_node, &receive_type_)) {      MS_LOG(ERROR) << "GetHcomReceiveType fail!";//接收类型丢失      return false;    }  }  if (!HcomUtil::GetKernelInputShape(anf_node, &hccl_kernel_input_shape_list_)) {    MS_LOG(ERROR) << "GetKernelInputShape fail!";//输入类型丢失    return false;  }  if (!HcomUtil::GetKernelOutputShape(anf_node, &hccl_kernel_output_shape_list_)) {    MS_LOG(ERROR) << "GetKernelOutputShape fail!";//输出类型丢失    return false;  }  if (op_name_ == kReceive) {//名称相同的情况下    auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(receive_type_);    if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) {      MS_LOG(ERROR) << "HcomDataType cannot support Current Ascend Data Type : " << receive_type_;      return false;    }    hccl_data_type_list_.emplace_back(iter->second);  } else if (!HcomUtil::GetHcomDataType(anf_node, &hccl_data_type_list_)) {    MS_LOG(ERROR) << "GetHcomDataType fail!";//获取hcom数据类型丢失    return false;  }  if (op_name_ == kReceive) {    if (!HcomUtil::GetHcomCount(anf_node, hccl_data_type_list_, hccl_kernel_output_shape_list_, &hccl_count_)) {      MS_LOG(ERROR) << "GetHcomCount fail!";//错误日志      return false;    }  } else {    if (!HcomUtil::GetHcomCount(anf_node, hccl_data_type_list_, hccl_kernel_input_shape_list_, &hccl_count_)) {      MS_LOG(ERROR) << "GetHcomCount fail!";//错误日志      return false;    }  }  if (op_name_ == kAllReduce || op_name_ == kReduceScatter) {    if (!HcomUtil::GetHcomOperationType(anf_node, &op_type_)) {      MS_LOG(ERROR) << "GetHcomOperationType fail!";//错误日志      return false;    }  }  if (op_name_ == kBroadcast) {    if (!HcomUtil::GetHcomRootId(anf_node, &root_id_)) {      MS_LOG(ERROR) << "GetHcomRootId fail!";//错误日志      return false;    }  }  HcomUtil::GetHcomGroup(NOT_NULL(anf_node), NOT_NULL(&group_));  anf_node_ = anf_node;  return true;}
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\mkldnn\con
    /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *///导入自定义的包#include "backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.h"#include <string>#include <algorithm>#include "utils/ms_utils.h"#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核信息  MS_EXCEPTION_IF_NULL(kernel_node);//空指针异常  std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);  std::vector<size_t> weight_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);  std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); //如果src或者weight的大小不等于4个节点,执行下面结构体  if (src_shape.size() != 4 || weight_shape.size() != 4) {    MS_LOG(EXCEPTION) << "conv2d only support nchw input!";//错误日志:conw2d只支持nchw输入  }  std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]});  size_t group = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, GROUP));//获取组别大小  //如果组别数不等于1  if (group != 1) {    if (src_shape[1] % group != 0) {//并且src的大小不能正好平均分配到不同组别,执行下面语句      MS_LOG(EXCEPTION) << "conv2d channels should be divided by group!";//Conv2d通道应按组划分    }    weight_shape.insert(weight_shape.begin(), group);    weight_shape[1] = weight_shape[1] / group;  }  dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape);  dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape);  dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape);  std::vector<int> stride_ori;  std::vector<int> dilation_ori;  auto stride_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDE);  auto dilation_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, DILATION);  (void)std::transform(stride_me.begin(), stride_me.end(), std::back_inserter(stride_ori),                       [](const int64_t &value) { return static_cast<int>(value); });  (void)std::transform(dilation_me.begin(), dilation_me.end(), std::back_inserter(dilation_ori),                       [](const int64_t &value) { return static_cast<int>(value); });  if (stride_ori[0] != 1 || stride_ori[1] != 1) {//conv2d stride在N轴和C轴只支持1    MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!";  }  if (dilation_ori.size() != 4) {    MS_LOG(EXCEPTION) << "conv2d dilation must be 4d!";//  }  if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {    MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!";  }  std::vector<int> stride{stride_ori[2], stride_ori[3]};  std::vector<int> dilation{dilation_ori[2], dilation_ori[3]};  dnnl::memory::dims strides{stride_ori[2], stride_ori[3]};  dnnl::memory::dims dilates{dilation_ori[2] - 1, dilation_ori[3] - 1};  std::vector<int> int_padding_l;  std::vector<int> int_padding_r;  const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE);  GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r, dilation);  if (int_padding_l.size() != 2 || int_padding_r.size() != 2) {    MS_LOG(EXCEPTION) << "get padding failed";  }  dnnl::memory::dims padding_l{int_padding_l[0], int_padding_l[1]};  dnnl::memory::dims padding_r{int_padding_r[0], int_padding_r[1]};  dnnl::convolution_forward::desc desc =    dnnl::convolution_forward::desc(dnnl::prop_kind::forward_training, dnnl::algorithm::convolution_auto, src_desc,                                    weights_desc, dst_desc, strides, dilates, padding_l, padding_r);  auto prim_desc = dnnl::convolution_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());  primitive_ = std::make_shared<dnnl::convolution_forward>(prim_desc);  AddArgument(DNNL_ARG_SRC, src_desc);  AddArgument(DNNL_ARG_WEIGHTS, weights_desc);  AddArgument(DNNL_ARG_DST, dst_desc);}//sbool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,                             const std::vector<kernel::AddressPtr> & /*workspace*/,                             const std::vector<kernel::AddressPtr> &outputs) {  if (inputs.size() < 2 || outputs.empty()) {    MS_LOG(EXCEPTION) << "error input output size!";  }  SetArgumentHandle(DNNL_ARG_SRC, inputs[0]->addr);  SetArgumentHandle(DNNL_ARG_WEIGHTS, inputs[1]->addr);  SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr);  ExecutePrimitive();  return true;}}  // namespace kernel}  // namespace mindspore
  • [活动体验] backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.cc
    /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */#include "backend/kernel_compiler/cpu/mkldnn/batch_norm_gard_cpu_kernel.h"//导入自定义文件#include <string>#include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"#include "runtime/device/cpu/cpu_device_address.h"#include "utils/ms_utils.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套void BatchNormGradCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {  CPUKernel::InitInputOutputSize(kernel_node);  MS_EXCEPTION_IF_NULL(kernel_node);//空指针异常类型  size_t type_size = sizeof(float);//判断float占几个字节大小,并将结果存入type_size变量中  std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//节点类型形状  size_t tensor_size = shape[1] * 2 * type_size;  input_size_list_.pop_back();  // [2, c] 到存储规模偏差  workspace_size_list_.emplace_back(tensor_size);  // [2, c] 到存储不同的规模和不同的偏差  workspace_size_list_.emplace_back(tensor_size);}void BatchNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {//批量处理内核信息  MS_EXCEPTION_IF_NULL(kernel_node);  std::vector<size_t> x_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);  if (x_shape.size() != 4) {//如果x的字节大小不是4个字节,记录错误日志    MS_LOG(EXCEPTION) << "Fused batchnorm only support nchw input!";//"融合标准批只支持nchw输入!  }  batch_size = x_shape[0];//记录第一个数据形状  channel = x_shape[1];  hw_size = x_shape[2] * x_shape[3];//计算硬件大小  nhw_size = x_shape[0] * hw_size;//计算新的硬件大小  dnnl::memory::desc x_desc = GetDefaultMemDesc(x_shape);  dnnl::memory::desc scale_bias_desc = GetDefaultMemDesc({2, channel});  auto epsilon = AnfAlgo::GetNodeAttr<float>(kernel_node, "epsilon");  auto prop_kind = dnnl::prop_kind::forward_training;  auto normalization_flags = dnnl::normalization_flags::use_scale_shift;  //融合批处理正向描述  dnnl::batch_normalization_forward::desc desc =    dnnl::batch_normalization_forward::desc(prop_kind, x_desc, epsilon, normalization_flags);  auto forward_prim_desc = dnnl::batch_normalization_forward::primitive_desc(desc, MKLKernelEngine::Get().engine());  //批量归一化反向描述  dnnl::batch_normalization_backward::desc backward_desc =    dnnl::batch_normalization_backward::desc(dnnl::prop_kind::backward, x_desc, x_desc, epsilon, normalization_flags);  auto backward_prim_desc = dnnl::batch_normalization_backward::primitive_desc(    backward_desc, MKLKernelEngine::Get().engine(), forward_prim_desc);  primitive_ = std::make_shared<dnnl::batch_normalization_backward>(backward_prim_desc);  AddArgument(DNNL_ARG_SRC, x_desc);  AddArgument(DNNL_ARG_MEAN, forward_prim_desc.mean_desc());  AddArgument(DNNL_ARG_VARIANCE, forward_prim_desc.variance_desc());  AddArgument(DNNL_ARG_SCALE_SHIFT, scale_bias_desc);  AddArgument(DNNL_ARG_WORKSPACE, forward_prim_desc.workspace_desc());  AddArgument(DNNL_ARG_DST, x_desc);  AddArgument(DNNL_ARG_DIFF_DST, x_desc);  AddArgument(DNNL_ARG_DIFF_SRC, x_desc);  AddArgument(DNNL_ARG_DIFF_SCALE_SHIFT, scale_bias_desc);}bool BatchNormGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,                                    const std::vector<kernel::AddressPtr> &workspace,                                    const std::vector<kernel::AddressPtr> &outputs) {  if (inputs.size() < 5 || outputs.empty()) {//判断输入数据大小是否小于五个字节,输出结果是否为空    MS_LOG(EXCEPTION) << "Error input output size!";  }  auto wksp_in = reinterpret_cast<float *>(workspace[0]->addr);//初始化变量并赋值  auto scale_ret = memcpy_s(wksp_in, workspace[0]->size, inputs[2]->addr, inputs[2]->size);//按比例缩小的返回值  auto max_size = workspace[0]->size - inputs[2]->size;//按比例放大的返回值  auto bias_ret = memset_s(wksp_in + (inputs[2]->size / sizeof(float)), max_size, 0., max_size);//偏差值  if (scale_ret != 0 && bias_ret != 0) {    MS_LOG(EXCEPTION) << "Memcpy_s error.";    return false;  }  SetArgumentHandle(DNNL_ARG_DIFF_DST, inputs[0]->addr);  SetArgumentHandle(DNNL_ARG_SRC, inputs[1]->addr);  SetArgumentHandle(DNNL_ARG_MEAN, inputs[3]->addr);  SetArgumentHandle(DNNL_ARG_VARIANCE, inputs[4]->addr);  SetArgumentHandle(DNNL_ARG_SCALE_SHIFT, workspace[0]->addr);  SetArgumentHandle(DNNL_ARG_DIFF_SRC, outputs[0]->addr);  SetArgumentHandle(DNNL_ARG_DIFF_SCALE_SHIFT, workspace[1]->addr);  ExecutePrimitive();  auto wksp_out = reinterpret_cast<float *>(workspace[1]->addr);//输出工作空间  auto diff_scale_ret = memcpy_s(outputs[1]->addr, outputs[1]->size, wksp_out, inputs[2]->size);//按比例缩小的返回值  auto diff_bias_ret =    memcpy_s(outputs[2]->addr, outputs[2]->size, wksp_out + (outputs[1]->size / sizeof(float)), outputs[2]->size);//按比例放大的返回值  if (diff_scale_ret != 0 || diff_bias_ret != 0) {    MS_LOG(EXCEPTION) << "Memcpy_s error.";    return false;//fanhu  }  return true;}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\aicpu_kernel_b
    void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {//设置节点输出  MS_EXCEPTION_IF_NULL(proto);  MS_EXCEPTION_IF_NULL(anf_node);  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);  if (output_num == 1 && HasAbstractMonad(anf_node)) {//if语句判断输出数字是否为1    output_num = 0;  }  if (output_num == 0) {//if语句判断输出数字是否为0    MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";    return;  }  for (size_t output_index = 0; output_index < output_num; output_index++) {//for循环遍历输出大小    ::mindspore::Tensor *node_outputs = proto->add_outputs();    MS_EXCEPTION_IF_NULL(node_outputs);    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);    mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();    MS_EXCEPTION_IF_NULL(tensorShape);    for (auto item : output_shape) {//增强for循环遍历输出形状      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();      MS_EXCEPTION_IF_NULL(dim);      dim->set_size((::google::protobuf::int64)item);//通过结构体指针调用函数    }    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);    node_outputs->set_tensor_type(output_data_type);    node_outputs->set_mem_device("HBM");  }}void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(proto);  std::string op_name = AnfAlgo::GetCNodeName(anf_node);  if (op_name == kInitDataSetQueue) {    op_name = kInitData;  }  // 设置op名称  proto->set_op(op_name);  // 设置输入张量  SetNodeInputs(anf_node, proto);  //设置输出张量  SetNodeOutputs(anf_node, proto);  // 设置节点attr  SetNodeAttr(anf_node, proto);}//创造节点函数类型bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,                        const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  MS_EXCEPTION_IF_NULL(anf_node);  mindspore::NodeDef proto;  SetNodedefProto(anf_node, &proto);  std::string nodeDefStr;  if (!proto.SerializeToString(&nodeDefStr)) {    MS_LOG(ERROR) << "Serialize nodeDef to string failed.";//序列化函数失败    return false;  }  kernel_mod_ptr->SetNodeDef(nodeDefStr);//设置节点函数  return true;}uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) {  // deal1: unknown shape type 未知函数类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);//声明变量起别名  info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;  info->infoLen = sizeof(int32_t);  ext_info_offset += kExtInfoHeadSize;  auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);//获取形状类型  *shape_type = type;  ext_info_offset += info->infoLen;  return ext_info_offset;}uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset,                                  const std::shared_ptr<AnfNode> &anf_node, size_t input_num) {  // deal2:input ShapeAndType  输入形状和类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);  info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;  info->infoLen = input_num * sizeof(ShapeAndType);  ext_info_offset += kExtInfoHeadSize;  auto *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);  for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环遍历数组输入节点类型和形状    TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);    std::vector<size_t> input_shape;    int32_t input_data_type;    if (input_type == kObjectTypeString) {      auto cnode = anf_node->cast<CNodePtr>();      MS_EXCEPTION_IF_NULL(cnode);      auto input_node = cnode->inputs()[input_index + 1];      auto value_ptr = GetValueNode(input_node);      auto value = GetValue<std::string>(value_ptr);      input_shape.push_back(1);      input_shape.push_back(value.size());      input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);    } else {      input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);      input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);    }    inputs[input_index].type = input_data_type;    size_t input_shape_index = 0;    for (; input_shape_index < input_shape.size(); input_shape_index++) {      inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);    }    if (input_shape.size() < kMaxShapeDims) {      inputs[input_index].dims[input_shape_index] = LLONG_MIN;    }  }  ext_info_offset += info->infoLen;  return ext_info_offset;}uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset,                                   const std::shared_ptr<AnfNode> &anf_node, size_t output_num) {  // deal3:确定输出形状和类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);  info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;  info->infoLen = output_num * sizeof(ShapeAndType);  ext_info_offset += kExtInfoHeadSize;  auto *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);  for (size_t output_index = 0; output_index < output_num; output_index++) {//    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);    outputs[output_index].type = output_data_type;    size_t output_shape_index = 0;    for (; output_shape_index < output_shape.size(); output_shape_index++) {      outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);    }    if (output_shape_index < kMaxShapeDims) {      outputs[output_index].dims[output_shape_index] = LLONG_MIN;    }  }  ext_info_offset += info->infoLen;  return ext_info_offset;}bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {//设置Ext节点函数信息  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  if (!anf_node->isa<CNode>()) {    return true;  }  if (!AnfAlgo::IsDynamicShape(anf_node)) {    return true;  }  uint64_t ext_info_head_len = kExtInfoHeadSize;  std::string ext_info;  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);  // 1.addr:未知形状类型  uint64_t ext_info_len = ext_info.size();  ext_info_len += ext_info_head_len + sizeof(int32_t);  // 2.addr:输入形状类型  ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);  // 3.addr:输出形状类型  ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);  uint64_t ext_info_offset = ext_info.size();  ext_info.resize(ext_info_len, 0);  char *ext_info_buf = ext_info.data();  UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;  auto op_name = AnfAlgo::GetCNodeName(anf_node);  if (kComputeDepend.find(op_name) != kComputeDepend.end()) {    shape_type = UnknowShapeOpType::DEPEND_COMPUTE;  }  ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type);  ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);  ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);  MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset;  //设置ext信息  kernel_mod_ptr->SetExtInfo(ext_info);  return true;}KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {//打开创建op的Aicpu  MS_EXCEPTION_IF_NULL(anf_node);  std::string op_name = AnfAlgo::GetCNodeName(anf_node);  if (op_name == kInitDataSetQueue) {    op_name = kInitData;  }  auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  kernel_mod_ptr->SetAnfNode(anf_node);  kernel_mod_ptr->SetNodeName(op_name);  if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {    MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!";  }  if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {//如果函数节点类型缺失    MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!";  }  if (!SetIOSize(anf_node, kernel_mod_ptr)) {//如果输入输出大小缺失    MS_LOG(EXCEPTION) << "Set input output size list failed.";  }  return kernel_mod_ptr;//返回值}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\aicpu_kernel_b
    void SetNodeOutputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {//设置节点输出  MS_EXCEPTION_IF_NULL(proto);  MS_EXCEPTION_IF_NULL(anf_node);  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);  if (output_num == 1 && HasAbstractMonad(anf_node)) {//if语句判断输出数字是否为1    output_num = 0;  }  if (output_num == 0) {//if语句判断输出数字是否为0    MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have output. ";    return;  }  for (size_t output_index = 0; output_index < output_num; output_index++) {//for循环遍历输出大小    ::mindspore::Tensor *node_outputs = proto->add_outputs();    MS_EXCEPTION_IF_NULL(node_outputs);    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);    mindspore::TensorShape *tensorShape = node_outputs->mutable_tensor_shape();    MS_EXCEPTION_IF_NULL(tensorShape);    for (auto item : output_shape) {//增强for循环遍历输出形状      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();      MS_EXCEPTION_IF_NULL(dim);      dim->set_size((::google::protobuf::int64)item);//通过结构体指针调用函数    }    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);    node_outputs->set_tensor_type(output_data_type);    node_outputs->set_mem_device("HBM");  }}void SetNodedefProto(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(proto);  std::string op_name = AnfAlgo::GetCNodeName(anf_node);  if (op_name == kInitDataSetQueue) {    op_name = kInitData;  }  // 设置op名称  proto->set_op(op_name);  // 设置输入张量  SetNodeInputs(anf_node, proto);  //设置输出张量  SetNodeOutputs(anf_node, proto);  // 设置节点attr  SetNodeAttr(anf_node, proto);}//创造节点函数类型bool CreateNodeDefBytes(const std::shared_ptr<AnfNode> &anf_node,                        const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  MS_EXCEPTION_IF_NULL(anf_node);  mindspore::NodeDef proto;  SetNodedefProto(anf_node, &proto);  std::string nodeDefStr;  if (!proto.SerializeToString(&nodeDefStr)) {    MS_LOG(ERROR) << "Serialize nodeDef to string failed.";//序列化函数失败    return false;  }  kernel_mod_ptr->SetNodeDef(nodeDefStr);//设置节点函数  return true;}uint64_t SetExtInfoShapeType(char *ext_info_buf, uint64_t ext_info_offset, UnknowShapeOpType type) {  // deal1: unknown shape type 未知函数类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);//声明变量起别名  info->infoType = FWK_ADPT_EXT_SHAPE_TYPE;  info->infoLen = sizeof(int32_t);  ext_info_offset += kExtInfoHeadSize;  auto *shape_type = reinterpret_cast<int32_t *>(ext_info_buf + ext_info_offset);//获取形状类型  *shape_type = type;  ext_info_offset += info->infoLen;  return ext_info_offset;}uint64_t SetExtInfoInputShapeType(char *ext_info_buf, uint64_t ext_info_offset,                                  const std::shared_ptr<AnfNode> &anf_node, size_t input_num) {  // deal2:input ShapeAndType  输入形状和类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);  info->infoType = FWK_ADPT_EXT_INPUT_SHAPE;  info->infoLen = input_num * sizeof(ShapeAndType);  ext_info_offset += kExtInfoHeadSize;  auto *inputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);  for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环遍历数组输入节点类型和形状    TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);    std::vector<size_t> input_shape;    int32_t input_data_type;    if (input_type == kObjectTypeString) {      auto cnode = anf_node->cast<CNodePtr>();      MS_EXCEPTION_IF_NULL(cnode);      auto input_node = cnode->inputs()[input_index + 1];      auto value_ptr = GetValueNode(input_node);      auto value = GetValue<std::string>(value_ptr);      input_shape.push_back(1);      input_shape.push_back(value.size());      input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);    } else {      input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);      input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);    }    inputs[input_index].type = input_data_type;    size_t input_shape_index = 0;    for (; input_shape_index < input_shape.size(); input_shape_index++) {      inputs[input_index].dims[input_shape_index] = SizeToLong(input_shape[input_shape_index]);    }    if (input_shape.size() < kMaxShapeDims) {      inputs[input_index].dims[input_shape_index] = LLONG_MIN;    }  }  ext_info_offset += info->infoLen;  return ext_info_offset;}uint64_t SetExtInfoOutputShapeType(char *ext_info_buf, uint64_t ext_info_offset,                                   const std::shared_ptr<AnfNode> &anf_node, size_t output_num) {  // deal3:确定输出形状和类型  auto *info = reinterpret_cast<ExtInfo *>(ext_info_buf + ext_info_offset);  info->infoType = FWK_ADPT_EXT_OUTPUT_SHAPE;  info->infoLen = output_num * sizeof(ShapeAndType);  ext_info_offset += kExtInfoHeadSize;  auto *outputs = reinterpret_cast<ShapeAndType *>(ext_info_buf + ext_info_offset);  for (size_t output_index = 0; output_index < output_num; output_index++) {//    std::vector<size_t> output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, output_index);    TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, output_index);    int32_t output_data_type = AicpuOpUtil::MsTypeToProtoType(output_type);    outputs[output_index].type = output_data_type;    size_t output_shape_index = 0;    for (; output_shape_index < output_shape.size(); output_shape_index++) {      outputs[output_index].dims[output_shape_index] = SizeToLong(output_shape[output_shape_index]);    }    if (output_shape_index < kMaxShapeDims) {      outputs[output_index].dims[output_shape_index] = LLONG_MIN;    }  }  ext_info_offset += info->infoLen;  return ext_info_offset;}bool CreateExtInfo(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {//设置Ext节点函数信息  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  if (!anf_node->isa<CNode>()) {    return true;  }  if (!AnfAlgo::IsDynamicShape(anf_node)) {    return true;  }  uint64_t ext_info_head_len = kExtInfoHeadSize;  std::string ext_info;  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);  // 1.addr:未知形状类型  uint64_t ext_info_len = ext_info.size();  ext_info_len += ext_info_head_len + sizeof(int32_t);  // 2.addr:输入形状类型  ext_info_len += ext_info_head_len + input_num * sizeof(ShapeAndType);  // 3.addr:输出形状类型  ext_info_len += ext_info_head_len + output_num * sizeof(ShapeAndType);  uint64_t ext_info_offset = ext_info.size();  ext_info.resize(ext_info_len, 0);  char *ext_info_buf = ext_info.data();  UnknowShapeOpType shape_type = UnknowShapeOpType::DEPEND_IN_SHAPE;  auto op_name = AnfAlgo::GetCNodeName(anf_node);  if (kComputeDepend.find(op_name) != kComputeDepend.end()) {    shape_type = UnknowShapeOpType::DEPEND_COMPUTE;  }  ext_info_offset = SetExtInfoShapeType(ext_info_buf, ext_info_offset, shape_type);  ext_info_offset = SetExtInfoInputShapeType(ext_info_buf, ext_info_offset, anf_node, input_num);  ext_info_offset = SetExtInfoOutputShapeType(ext_info_buf, ext_info_offset, anf_node, output_num);  MS_LOG(INFO) << "Check ext_info_len:" << ext_info_len << " ext_info_offset:" << ext_info_offset;  //设置ext信息  kernel_mod_ptr->SetExtInfo(ext_info);  return true;}KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {//打开创建op的Aicpu  MS_EXCEPTION_IF_NULL(anf_node);  std::string op_name = AnfAlgo::GetCNodeName(anf_node);  if (op_name == kInitDataSetQueue) {    op_name = kInitData;  }  auto kernel_mod_ptr = std::make_shared<AicpuOpKernelMod>();  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  kernel_mod_ptr->SetAnfNode(anf_node);  kernel_mod_ptr->SetNodeName(op_name);  if (!CreateNodeDefBytes(anf_node, kernel_mod_ptr)) {    MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!";  }  if (!CreateExtInfo(anf_node, kernel_mod_ptr)) {//如果函数节点类型缺失    MS_LOG(EXCEPTION) << "Create nodeDefBytes failed!";  }  if (!SetIOSize(anf_node, kernel_mod_ptr)) {//如果输入输出大小缺失    MS_LOG(EXCEPTION) << "Set input output size list failed.";  }  return kernel_mod_ptr;//返回值}}  // namespace kernel}  // namespace mindspore
  • [活动体验] mindspore\mindspore\ccsrc\backend\kernel_compiler\aicpu_kernel_b
    /** * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//导入包名# include "backend/kernel_compiler/aicpu/aicpu_kernel_build.h"#include <google/protobuf/text_format.h>//导入系统自带的包#include <utility>#include <string>#include <vector>#include <memory>#include <algorithm>#include <map>#include <climits>//导入自定的包#include "utils/utils.h"#include "runtime/device/kernel_runtime.h"#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"#include "proto/tensor.pb.h"#include "proto/tensor_shape.pb.h"#include "proto/attr.pb.h"#include "proto/node_def.pb.h"#include "backend/session/anf_runtime_algorithm.h"#include "backend/kernel_compiler/aicpu/aicpu_util.h"#include "backend/session/kernel_graph.h"#include "backend/kernel_compiler/common_utils.h"#include "backend/kernel_compiler/oplib/oplib.h"namespace mindspore {//声明一个空间namespace kernel {//空间嵌套 //将后面变量名指定别名,并导入变量到当前作用域,方便以后调用。using FNodeAttrHandle = std::function<void(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto)>;//输出IO流输出大小bool SetIOIputSize(const std::shared_ptr<AnfNode> &anf_node, const size_t &input_num,                   std::vector<size_t> *input_size_list) {  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(input_size_list);  for (size_t i = 0; i < input_num; i++) {//遍历数组,输入数字    std::vector<size_t> shape_i = AnfAlgo::GetInputDeviceShape(anf_node, i);    if (AnfAlgo::GetInputDeviceDataType(anf_node, i) == kObjectTypeString) {//当数据类型相同时      if (!anf_node->isa<CNode>()) {//当anf节点不是关联节点时        MS_LOG(EXCEPTION) << "anf_node is not CNode.";      }      auto cnode = anf_node->cast<CNodePtr>();      MS_EXCEPTION_IF_NULL(cnode);      if (cnode->inputs().size() < (i + 1)) {//如果输入节点大小小于i+1,执行下面语句        MS_LOG(ERROR) << "cnode inputs size " << cnode->inputs().size() << " is smaller than " << i + 1;        return false;//f      }      auto input_node = cnode->inputs()[i + 1];//声明变量并赋值      MS_EXCEPTION_IF_NULL(input_node);//函数调用      if (input_node->isa<ValueNode>()) {//如果输入的节点是有效节点,执行下面语句        auto value_ptr = GetValueNode(input_node);//声名变量调用函数并赋值        auto value = GetValue<std::string>(value_ptr);//声名变量调用函数并赋值        input_size_list->push_back(value.size());      }    } else {//如果不是有效节点,执行下面循环体      auto type_ptr = TypeIdToType(AnfAlgo::GetInputDeviceDataType(anf_node, i));      MS_EXCEPTION_IF_NULL(type_ptr);      int64_t size_i = 1;      for (size_t j = 0; j < shape_i.size(); j++) {//遍历数组        size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));      }      size_t type_byte = GetTypeByte(type_ptr);      if (type_byte == 0) {//如果字节类型为0,返回false        return false;      }      size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));      input_size_list->push_back(LongToSize(size_i));    }  }  return true;//返回值为true}//设置及布尔类型的函数bool SetIOSize(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(kernel_mod_ptr);  std::vector<size_t> input_size_list;  std::vector<size_t> output_size_list;  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);  size_t output_num = AnfAlgo::GetOutputTensorNum(anf_node);  if (!SetIOIputSize(anf_node, input_num, &input_size_list)) {    return false;  }  kernel_mod_ptr->SetInputSizeList(input_size_list);  if (output_num == 1 && HasAbstractMonad(anf_node)) {    output_num = 0;  }  for (size_t i = 0; i < output_num; i++) {//遍历数组输出数字    std::vector<size_t> shape_i = AnfAlgo::GetOutputDeviceShape(anf_node, i);    TypePtr type_ptr = TypeIdToType(AnfAlgo::GetOutputDeviceDataType(anf_node, i));    MS_EXCEPTION_IF_NULL(type_ptr);    int64_t size_i = 1;    for (size_t j = 0; j < shape_i.size(); j++) {//遍历数组设置i的形状和大小      size_i = LongMulWithOverflowCheck(size_i, static_cast<int>(shape_i[j]));    }    size_t type_byte = GetTypeByte(type_ptr);    if (type_byte == 0) {//如果字节类型个数为0,返回值为false      return false    }    size_i = LongMulWithOverflowCheck(size_i, SizeToInt(type_byte));    output_size_list.push_back(LongToSize(size_i));  }  kernel_mod_ptr->SetOutputSizeList(output_size_list);  return true;}//函数的属性值void ParseAttrValue(const std::string &type, const std::string &attr_name, const mindspore::ValuePtr &value,                    ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr) {  MS_EXCEPTION_IF_NULL(node_attr);  MS_EXCEPTION_IF_NULL(value);  if (type == "int") {//如果函数类型为int,执行循环体    auto attr_value = static_cast<int>(GetValue<int64_t>(value));    (*node_attr)[attr_name].set_i(attr_value);  } else if (type == "str") {//如果函数类型为str,执行循环体    auto attr_value = GetValue<std::string>(value);    (*node_attr)[attr_name].set_s(attr_value);  } else if (type == "bool") {//如果函数类型为bool,执行循环体    auto attr_value = GetValue<bool>(value);    (*node_attr)[attr_name].set_b(attr_value);  } else if (type == "float") {//如果函数类型为float,执行循环体    auto attr_value = GetValue<float>(value);    (*node_attr)[attr_name].set_f(attr_value);  } else if (type == "listInt") {//如果函数类型为litint,执行循环体    std::vector<int64_t> attr_value;    auto value_type = value->type();    MS_EXCEPTION_IF_NULL(value_type);    auto value_type_str = value_type->ToString();    if (value_type_str == "Int64") {//如果函数类型为int64,执行循环体      auto data = GetValue<int64_t>(value);      attr_value.push_back(data);    } else {      attr_value = GetValue<std::vector<int64_t>>(value);    }    mindspore::AttrValue input_shape_attr;    mindspore::AttrValue_ArrayValue *input_shape_attr_list = input_shape_attr.mutable_array();    MS_EXCEPTION_IF_NULL(input_shape_attr_list);    for (const auto shape : attr_value) {//增强forx      input_shape_attr_list->add_i(shape);    }    (*node_attr)[attr_name] = input_shape_attr;  } else {//都不符合上述类型,输出不支持该类型    MS_LOG(EXCEPTION) << "type: " << type << "not support";  }}void SetNodeAttr(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {  MS_EXCEPTION_IF_NULL(anf_node);  MS_EXCEPTION_IF_NULL(proto);  std::string op_name = AnfAlgo::GetCNodeName(anf_node);  if (op_name == kInitDataSetQueue) {//if语句 判断op_name类型    op_name = kInitData;  }  if (op_name == kPrint) {    return;  }  auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, OpImplyType::kAICPU);  MS_EXCEPTION_IF_NULL(op_info_ptr);  auto attrs_ptr = op_info_ptr->attrs_ptr();  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);  MS_EXCEPTION_IF_NULL(primitive);  ::google::protobuf::Map<::std::string, ::mindspore::AttrValue> *node_attr = proto->mutable_attrs();  for (const auto &attr_ptr : attrs_ptr) {    MS_EXCEPTION_IF_NULL(attr_ptr);    std::string attr_name = attr_ptr->name();    auto value = primitive->GetAttr(attr_name);    if (value != nullptr) {//if语句判断数值大小      if (attr_name == kQueueName || attr_name == kSharedName) {        attr_name = kChannelName;      } else if (attr_name == kSeed0) {        attr_name = kSeed;      } else if (attr_name == kSeed1) {        attr_name = kSeed2;      }      std::string type = attr_ptr->type();      ParseAttrValue(type, attr_name, value, node_attr);    }  }}//设置节点输入void SetNodeInputs(const std::shared_ptr<AnfNode> &anf_node, mindspore::NodeDef *proto) {  MS_EXCEPTION_IF_NULL(proto);  MS_EXCEPTION_IF_NULL(anf_node);  size_t input_num = AnfAlgo::GetInputTensorNum(anf_node);  if (input_num == 0) {//如果节点输入数字为0,执行下面语句    MS_LOG(INFO) << "Node [" << AnfAlgo::GetCNodeName(anf_node) << "] does not have input.";    return;  }  for (size_t input_index = 0; input_index < input_num; input_index++) {//for循环,用于设置输入节点的大小    ::mindspore::Tensor *node_inputs = proto->add_inputs();    MS_EXCEPTION_IF_NULL(node_inputs);    TypeId input_type = AnfAlgo::GetInputDeviceDataType(anf_node, input_index);    std::vector<size_t> input_shape;    int32_t input_data_type;    if (input_type == kObjectTypeString) {      auto cnode = anf_node->cast<CNodePtr>();//声明变量,并调用函数赋初值      MS_EXCEPTION_IF_NULL(cnode);      auto input_node = cnode->inputs()[input_index + 1];      auto value_ptr = GetValueNode(input_node);      auto value = GetValue<std::string>(value_ptr);      input_shape.push_back(1);      input_shape.push_back(value.size());      input_data_type = AicpuOpUtil::MsTypeToProtoType(kTypeUnknown);    } else {      input_shape = AnfAlgo::GetInputDeviceShape(anf_node, input_index);      input_data_type = AicpuOpUtil::MsTypeToProtoType(input_type);    }    mindspore::TensorShape *tensorShape = node_inputs->mutable_tensor_shape();    for (auto item : input_shape) {//增强for循环,遍历数组设置节点数据类型和形状      mindspore::TensorShape_Dim *dim = tensorShape->add_dim();      dim->set_size((::google::protobuf::int64)item);    }    node_inputs->set_tensor_type(input_data_type);//根据结构体调用函数    node_inputs->set_mem_device("HBM");  }}
  • [活动体验] mindspore\lite\src\lite_kernel_util.cc&quot;注释1
    ** "mindspore\lite\src\lite_kernel_util.cc"注释1** ======================================= ```python /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/lite_kernel_util.h" #include #include #include "src/sub_graph_kernel.h" //命名空间内核 namespace mindspore::kernel { using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; //节点图输入节点 std::vector LiteKernelUtil::SubgraphInputNodes(const std::vector &kernels) { std::vector input_nodes;//创建一个装输入节点的容器 for (const auto &kernel : kernels) { // 如果内核没有预内核,内核是图输入,它必须是子图输入 if (kernel->in_kernels().empty() && !kernel->in_tensors().empty()) { if (!lite::IsContain(input_nodes, kernel)) { input_nodes.push_back(kernel);//遍历添加内核 } continue; } auto all_input_tensors = kernel->in_tensors(); // 从输入张量中删除所有常量张量 for (auto iter = all_input_tensors.begin(); iter != all_input_tensors.end();) {//遍历输入tensors if ((*iter)->IsConst()) { iter = all_input_tensors.erase(iter);//删除iter } else { iter++; } } for (const auto &kernel_in_subgraph : kernels) { // 从子图中的内核中删除输入张量 for (const auto *tensor : kernel_in_subgraph->out_tensors()) {//遍历输出Tensors auto ret = std::find(all_input_tensors.begin(), all_input_tensors.end(), tensor);//寻找相应的tensor if (ret != all_input_tensors.end()) {//判断是否包含 all_input_tensors.erase(ret); } } } // 如果某些输入张量不是来自子图中的内核 if (!all_input_tensors.empty()) {// if (!lite::IsContain(input_nodes, kernel)) { input_nodes.push_back(kernel);//添加内核 } } } return input_nodes;//返回输入节点 } //子图输出节点函数 std::vector LiteKernelUtil::SubgraphOutputNodes( const std::vector &kernels) { std::vector output_nodes; // 如果kernel没有post-kernel,kernel是图输出,一定是子图输出 for (const auto &kernel : kernels) {//遍历内核 if (kernel->is_model_output() || (kernel->out_kernels().empty() && !kernel->out_tensors().empty())) {//检查是否为空 if (!lite::IsContain(output_nodes, kernel)) {//判断该节点是否包含该内核 output_nodes.push_back(kernel);//添加内核 } continue; } for (const auto &output : kernel->out_kernels()) {//遍历输出内核 auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output);//寻找内核的位置 if (out_kernel_in_graph == kernels.end()) {//判断是否输出成功 if (!lite::IsContain(output_nodes, kernel)) { output_nodes.push_back(kernel);//遍历添加内核 } break; } } } return output_nodes;//返回输出节点 }
  • [活动体验] cpu\update_cache_cpu_kernel.cc代码标注
    # mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\update_cache_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/update_cache_cpu_kernel.h" #include #include "runtime/device/cpu/cpu_device_address.h" //声明双重空间 namespace mindspore { namespace kernel { //初始化内核 void UpdateCacheCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 node_wpt_ = kernel_node; input_x_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取x的数据类型 indices_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1);//获取y的数据类型 if (input_x_dtype_ == kNumberTypeFloat32 || input_x_dtype_ == kNumberTypeInt32) { input_x_dtype_size_ = 4; } else if (input_x_dtype_ == kNumberTypeFloat64 || input_x_dtype_ == kNumberTypeInt64) { input_x_dtype_size_ = 8; } else {//错误日志:输入值x只支持 float32, float64, int32, int64的数据类型 MS_LOG(EXCEPTION) "input_x dtype only support float32, float64, int32, int64"; } } //检查数据类型 bool UpdateCacheCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (indices_dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else if (indices_dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } else { MS_LOG(ERROR) "indices dtype only support int32, int64"; return false; } return true; } //检查内核 template void UpdateCacheCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto node_ = node_wpt_.lock();//获取节点 if (!node_) {//节点工作空间失效 MS_LOG(EXCEPTION) "node_wpt_ is expired."; } auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1); auto update_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 2); batch_size_ = 1;//初始化变量 for (size_t i = 0; i indices_shape.size(); ++i) {//循环遍历获取批处理大小 batch_size_ *= indices_shape[i]; } MS_LOG(INFO) "UpdateCache batch_size:" batch_size_;//更新批处理大小 update_size_ = 1; for (size_t i = 0; i update_shape.size(); ++i) { update_size_ *= update_shape[i]; } update_length_ = update_shape[1]; //类型转换 char *input_x = reinterpret_cast(inputs[0]->addr); T *indices = reinterpret_cast(inputs[1]->addr); char *update = reinterpret_cast(inputs[2]->addr); max_num_ = *reinterpret_cast(inputs[3]->addr); size_t one_length_size = input_x_dtype_size_ * update_length_; auto max_size = inputs[0]->size; for (size_t i = 0; i batch_size_; ++i) { if (indices[i] 0 || indices[i] >= max_num_) continue; char *tmp = update + i * one_length_size; if (indices[i] * one_length_size + one_length_size = max_size) { int ret = memcpy_s(input_x + indices[i] * one_length_size, max_size - indices[i] * one_length_size, tmp, one_length_size); if (ret != 0) { MS_LOG(EXCEPTION) "memcpy_s error, errorno" ret; } } else {//字符串拷贝c MS_LOG(EXCEPTION) "Memcpy out of size"; } } } } // namespace kernel } // namespace mindspore ```
  • [活动体验] cpu\unsorted_segment_sum_cpu_kernel.cc代码标注
    # mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unsorted_segment_sum_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unsorted_segment_sum_cpu_kernel.h" //导入系统自带的库 #include #include "runtime/device/cpu/cpu_device_address.h" //导入自定义的库 #include "common/thread_pool.h" //双重空间 namespace mindspore { namespace kernel { //初始化内核 void UnsortedSegmentSumCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入值个数 if (input_num != 2) {//如果输入值个数不为2,则记录错误日志:输入值个数是...,但是内核需要2个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UnsortedSegmentSum needs 2 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//如果输出值个数不为2,则记录错误日志:输出值个数是...,但是内核需要1个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but UnsortedSegmentSum needs 1 output."; } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 segment_ids_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 1);//获取片段数据类型 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 auto segment_ids_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); for (size_t i = 0; i input_shape.size(); ++i) { unit_num_ *= input_shape[i]; if (i >= segment_ids_shape.size()) { input_dim1_ *= input_shape[i]; } } output_dim0_ = output_shape[0]; for (size_t j = 1; j output_shape.size(); j++) { output_dim1_ *= output_shape[j]; } } //检查数据类型 bool UnsortedSegmentSumCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { bool ret{true}; if (dtype_ == kNumberTypeInt32 && segment_ids_dtype_ == kNumberTypeInt32) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 && segment_ids_dtype_ == kNumberTypeInt32) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt32 && segment_ids_dtype_ == kNumberTypeInt64) { ret = LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 && segment_ids_dtype_ == kNumberTypeInt64) { ret = LaunchKernel(inputs, outputs); } else {//错误日志:输入值x仅支持int32 and float32, indices int32 and int64的数据类型 MS_LOG(ERROR) "Only support input_x int32 and float32, indices int32 and int64"; return false; } return ret; } //检查内核 template bool UnsortedSegmentSumCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { //类型转换 S *input_addr = reinterpret_cast(inputs[0]->addr); T *indices_addr = reinterpret_cast(inputs[1]->addr); S *output_addr = reinterpret_cast(outputs[0]->addr); auto ret = memset_s(output_addr, outputs[0]->size, 0, outputs[0]->size); if (ret != EOK) {//错误日志:输出增幅函数s MS_LOG(ERROR) "Output buff memset fail. ret:" ret; return false; } for (size_t i = 0; i unit_num_; ++i) { size_t j = i / input_dim1_; size_t k = i % input_dim1_; T index = indices_addr[j]; if (index 0 || index >= SizeToInt(output_dim0_)) { continue; } size_t output_index = index * output_dim1_ + k; output_addr[output_index] += input_addr[i]; } return true; } } // namespace kernel } // namespace mindspore ```
  • [活动体验] cpu\unpack_cpu_kernel.cc代码标注
    # mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unpack_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unpack_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 template void UnpackCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); int64_t axis_tmp = AnfAlgo::GetNodeAttr(kernel_node, "axis"); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值地址 if (axis_tmp 0) { axis_tmp += SizeToLong(input_shape.size()); } size_t axis_ = LongToSize(axis_tmp); output_num_ = LongToSize(AnfAlgo::GetNodeAttr(kernel_node, "num"));//获取输出值个数 for (size_t i = 0; i input_shape.size(); i++) { input_size_ *= input_shape[i]; if (i > IntToSize(axis_)) { dims_after_axis_ *= input_shape[i]; } } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取输出值输据类型 } //初始化输入输出值大小 template void UnpackCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); workspace_size_list_.emplace_back(sizeof(T *) * output_num_); } //检查输入输出值,工作空间 template bool UnpackCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { LaunchKernel(inputs, workspace, outputs); return true; } //检查参数 template void UnpackCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { input_ = reinterpret_cast(inputs[0]->addr);//类型转换 MS_EXCEPTION_IF_NULL(input_);//判断输入值是否为空 outputs_host_ = reinterpret_cast(workspace[0]->addr); MS_EXCEPTION_IF_NULL(outputs_host_);//判断输出值是否为空 for (size_t i = 0; i outputs.size(); i++) { outputs_host_[i] = reinterpret_cast(outputs[i]->addr); MS_EXCEPTION_IF_NULL(outputs_host_[i]);//获取输入主机是否为0 } size_t number_of_reset = output_num_ * dims_after_axis_; auto task = [this, number_of_reset](const size_t start, const size_t end) { for (size_t i = start; i end; ++i) { size_t output_index = (i / dims_after_axis_) % output_num_; size_t tensor_index = i / number_of_reset * dims_after_axis_ + i % dims_after_axis_; outputs_host_[output_index][tensor_index] = input_[i]; } }; CPUKernelUtils::ParallelFor(task, input_size_); } //检查参数 template void UnpackCPUKernel::CheckParam(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//如果输入值个数不为1,输入值是...但是内核需要一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UnpackCPUKernel needs 1 input."; } } } // namespace kernel } // namespace mindspore ```
总条数:168 到第
上滑加载中