-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unique_with_pad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //检查内核数据类型 bool UniqueWithPadCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { if (dtype_ == kNumberTypeInt32) { UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) { UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); } else {//没有支持的数据类型 MS_LOG(EXCEPTION) "Not support data type: " dtype_; } return true; } //检查键盘输入输出值是否要求 template void UniqueWithPadCPUKernel::PadOutput(const std::vector &inputs, const std::vector &outputs) { if (inputs.size() 2) {//输入值必须大于1 MS_LOG(EXCEPTION) "Input size should be large than 1"; } if (outputs.size() 1) {//输出值个数必须大于0 MS_LOG(EXCEPTION) "Output size should be large than 0"; } //类型转换 T pad_num = *reinterpret_cast(inputs[1]->addr); T *out = reinterpret_cast(outputs[0]->addr); for (size_t i = output_size_; i input_size_; ++i) { out[i] = pad_num; } } //检查s'g void UniqueWithPadCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() != 1) {//输入dim是...但UniqueCPU内核只支持1d MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but UniqueCPUKernel only support 1d."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//输入值个数是...但是内核需要2g输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UniqueCPUKernel needs 2 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 2) {//输出值个数是...但是内核需要2个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but UniqueCPUKernel needs 2 output."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\unique_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/unique_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 constexpr size_t kBucketSortThreshold = 100000; //初始化内核 void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) { node_wpt_ = kernel_node;//获取节点工作空间 CheckParam(kernel_node);//检查参数 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 input_size_ = input_shape[0]; dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取输据类型 if (AnfAlgo::HasNodeAttr(SORTED, kernel_node)) { sorted_ = AnfAlgo::GetNodeAttr(kernel_node, SORTED); } } //初始化输入输出值大小 void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node);//获取输入值大小 //原地直接创建临时对象 workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); } //检查数据类型 bool UniqueCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, workspace, outputs); } else {//没有支持的数据类型 MS_LOG(EXCEPTION) "Not support type: " dtype_; } if (!node_wpt_.expired()) { auto node_ = node_wpt_.lock(); if (!node_) {//节点工作空间失效 MS_LOG(EXCEPTION) "node_wpt_ is expired."; } std::vector out_shape; out_shape.emplace_back(output_size_); std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); for (size_t i = 0; i output_num; i++) {//循环遍历原地创建临时对象 dtypes.push_back(AnfAlgo::GetOutputInferDataType(node_, i)); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get()); } return true; } //检查内核 template void UniqueCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { //检查输入输出值个数以及工作空间是否符合要求 if (input_size_ == 0) {//输入值个数等于0时,返回值 return; } if (inputs.size() 1) {//输入大小应该大于0 MS_LOG(EXCEPTION) "Input size should be large than 0"; } if (workspace.size() 3) {//工作区大小应该大于2 MS_LOG(EXCEPTION) "workspace size should be large than 2"; } if (outputs.size() 2) {//输出大小应该大于1 MS_LOG(EXCEPTION) "Output size should be large than 1"; } auto params = std::make_shared>();//获取参数个数 //类型转换 params->input_ = reinterpret_cast(inputs[0]->addr); params->input_idx_ = reinterpret_cast(workspace[0]->addr); params->workspace_ = reinterpret_cast(workspace[1]->addr); params->workspace_idx_ = reinterpret_cast(workspace[2]->addr); params->output_ = reinterpret_cast(outputs[0]->addr); params->inverse_idx_ = reinterpret_cast(outputs[1]->addr); params->input_size_ = input_size_; params->output_size_ = 0; params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取运行相乘数 if (sorted_) { params->need_sort_ = true; if (input_size_ kBucketSortThreshold) { Unique(params); } else { BucketUnique(params); } } else { params->need_sort_ = false; Unique(params); } output_size_ = params->output_size_; } //检查参数 void UniqueCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); if (input_shape.size() != 1) {//输入dim是...但UniqueCPU内核只支持1d MS_LOG(EXCEPTION) "Input dims is " input_shape.size() ", but UniqueCPUKernel only support 1d."; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//输入值个数是...但是内核需要一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but UniqueCPUKernel needs 1 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 2) {//输出值个数是...但是内核需要两个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but UniqueCPUKernel needs 2 output."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\transpose_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/transpose_cpu_kernel.h" //导入系统自带的库 #include #include //导入自定义的库 #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输入值的形状 output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);//获取输出值的形状 auto tmp = AnfAlgo::GetNodeAttr>(kernel_node, "perm");//获取节点地址 axes_ = {tmp.begin(), tmp.end()}; dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0);//获取节点输出值数据类型 if (dtype_ == kTypeUnknown) { dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeInt32] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeInt64] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeUInt8] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeUInt16] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeUInt32] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeUInt64] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeFloat32] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeBool] = &TransposeCPUFwdKernel::LaunchKernel; auto iter = launch_map_.find(dtype_);//获取数据类型 if (iter != launch_map_.end()) { launch_func_ = iter->second; } else {//若输入数据类型不在数据类型图中,则记录错误日志:输入数据类型:...Transpose内核CPU不支持此类数据类型 MS_LOG(EXCEPTION) "Input data type: " dtype_ "is not supported for Transpose kernel on CPU."; } } //检查输入输出值 bool TransposeCPUFwdKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { launch_func_(this, inputs, outputs); return true; } //检查内核参数 template void TransposeCPUFwdKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { //类型转换 auto input_addr = reinterpret_cast(inputs[0]->addr);//获取输入值地址并进行类型转换 auto output_addr = reinterpret_cast(outputs[0]->addr);//获取输出值地址并进行类型转换 size_t size = IntToSize(inputs[0]->size / sizeof(T)); TransposeIterator base_iter(output_shape_, axes_, input_shape_); auto task = [&base_iter, input_addr, output_addr](size_t start, size_t end) {//h auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i end; ++i) { output_addr[i] = input_addr[iter.GetPos()]; iter.GenNextPos(); } }; CPUKernelUtils::ParallelFor(task, size); } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\topk_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/topk_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template //检查内核输入输出值 void TopKCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { if (inputs.size() != 2 || outputs.size() != 2) {//TopK内核需要2个输入和2个输出,但获取输入是... MS_LOG(EXCEPTION) "TopK needs 2 inputs and 2 outputs, but get inputs: " inputs.size() "outputs: " outputs.size(); } if (inputs[0]->size != outer_size_ * inner_size_ * sizeof(T)) {//错误的输入数据类型 MS_LOG(EXCEPTION) "Error input data size!"; } if (inputs[1]->size != sizeof(int)) {//输入值k的数据类型必须为int MS_LOG(EXCEPTION) "Input K must be int!"; } //类型转换 auto input = reinterpret_cast(inputs[0]->addr); int k = reinterpret_cast(inputs[1]->addr)[0]; auto output = reinterpret_cast(outputs[0]->addr); auto indices = reinterpret_cast(outputs[1]->addr); if (k 1) {//输入值k必须大于0 MS_LOG(EXCEPTION) "Input k must > 0!"; } int k_num = std::min(inner_size_, k); if (outputs[0]->size != outer_size_ * k_num * sizeof(T)) {//错误的输出值数据类型大小 MS_LOG(EXCEPTION) "Error output data size!"; } for (size_t i = 0; i outer_size_; ++i) { std::vector idx(inner_size_); auto base_input = i * inner_size_; std::iota(idx.begin(), idx.end(), base_input); std::stable_sort(idx.begin(), idx.end(), [&input](size_t index_1, size_t index_2) { return input[index_1] > input[index_2]; }); auto base_output = i * k_num; if (!sorted_) { std::stable_sort(idx.begin(), idx.begin() + k_num); } for (int j = 0; j k_num; ++j) { indices[base_output + j] = idx[j] - base_input; output[base_output + j] = input[idx[j]]; } } } //初始化内核 void TopKCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值参数形状 for (size_t i = 0; i x_shape_.size() - 1; ++i) { outer_size_ *= x_shape_[i]; } inner_size_ = x_shape_[x_shape_.size() - 1];//获取内层大小 sorted_ = AnfAlgo::GetNodeAttr(kernel_node, "sorted");//排序 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 } //j bool TopKCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, outputs); } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\tile_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/tile_cpu_kernel.h" #include #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void TileCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取变量的形状 y_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出值节点的形状 std::vector multiples_me = AnfAlgo::GetNodeAttr>(kernel_node, "multiples"); (void)std::transform(multiples_me.begin(), multiples_me.end(), std::back_inserter(multiples_), [](const int64_t &value) { return static_cast(value); }); dtype_ = AnfAlgo ::GetPrevNodeOutputDeviceDataType(kernel_node, 0); if (dtype_ == kTypeUnknown) { dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } //数据类型图 launch_map_[kNumberTypeInt8] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeInt16] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeInt32] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeInt64] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeUInt8] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeUInt16] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeUInt32] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeUInt64] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeFloat32] = &TileCPUKernel::LaunchKernel; launch_map_[kNumberTypeBool] = &TileCPUKernel::LaunchKernel; auto iter = launch_map_.find(dtype_); if (iter != launch_map_.end()) { launch_func_ = iter->second; } else {//输入的数据类型:...CPU内核不支持这样的数据类型 MS_LOG(EXCEPTION) "Input data type: " dtype_ "is not supported for Tile kernel on CPU."; } } //检查输入输出值 bool TileCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { launch_func_(this, inputs, outputs); return true; } //j template void TileRecTask(const T *x, T *y, size_t dim, size_t *offset, std::vector *pos, const std::vector &multiples, const std::vector &cargo_x, const std::vector &cargo_y, const std::vector &x_shape) { if (dim == x_shape.size()) { return; } for (size_t i = 0; i x_shape[dim]; ++i) { (*pos)[dim] = i; if (dim == x_shape.size() - 1) { size_t x_offset = 0; for (size_t j = 0; j (*pos).size(); ++j) { x_offset += (*pos)[j] * cargo_x[j]; } memcpy_s(y + *offset, sizeof(T), x + x_offset, sizeof(T)); *offset += 1; continue; } TileRecTask(x, y, dim + 1, offset, pos, multiples, cargo_x, cargo_y, x_shape); } size_t dim_size = cargo_y[dim] * sizeof(T); for (int m = 0; m multiples[dim] - 1; ++m) { size_t y_offset = *offset - cargo_y[dim]; memcpy_s(y + *offset, dim_size, y + y_offset, dim_size); *offset += cargo_y[dim]; } } //检查内核 template void TileCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { //类型转换 auto x_addr = reinterpret_cast(inputs[0]->addr); auto y_addr = reinterpret_cast(outputs[0]->addr); size_t ones = multiples_.size() - x_shape_.size(); if (ones > 0) { for (size_t i = 0; i ones; ++i) { x_shape_.insert(x_shape_.begin(), 1); } } int d = multiples_.size(); std::vector pos(d, 0); std::vector cargo_x(d, 1); std::vector cargo_y = x_shape_; for (int i = d - 2; i >= 0; --i) {//遍历数组 cargo_x[i] = x_shape_[i + 1] * cargo_x[i + 1]; cargo_y[i] *= cargo_y[i + 1] * multiples_[i + 1]; } size_t offset = 0; TileRecTask(x_addr, y_addr, 0, &offset, &pos, multiples_, cargo_x, cargo_y, x_shape_); } //检查输入输出参数个数 void TileCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//输入值个数是...但是内核需要的是一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but TileCPUKernel needs 1 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//输出值个数是...但是内核需要的是一个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but TileCPUKernel needs 1 output."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\sub_and_filter_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.h" #include <string> #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void SubAndFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 node_wpt_ = kernel_node;//获取工作空间 input_x_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } //检查数据类型 bool SubAndFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> & /*workspace*/, const std::vector<kernel::AddressPtr> &outputs) { if (input_x_dtype_ == kNumberTypeInt32) { LaunchKernel<int>(inputs, outputs); } else if (input_x_dtype_ == kNumberTypeInt64) { LaunchKernel<int64_t>(inputs, outputs); } else {//输入值x只支持int32, int64数据类型 MS_LOG(ERROR) << "input x dtype only support int32, int64"; return false; } return true; } //检查内核 template <typename T> void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) { auto node_ = node_wpt_.lock(); if (!node_) {//节点工作空间失效 MS_LOG(EXCEPTION) << "node_wpt_ is expired."; } auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 0); batch_size_ = 1; for (size_t i = 0; i < indices_shape.size(); ++i) { batch_size_ *= indices_shape<i>; } MS_LOG(INFO) << "SubAndFilter batch_size:" << batch_size_; //类型转换 T *input_x = reinterpret_cast<T *>(inputs[0]->addr); T max_num = *reinterpret_cast<T *>(inputs[1]->addr); T offset = *reinterpret_cast<T *>(inputs[2]->addr); T *filter_res = reinterpret_cast<T *>(outputs[0]->addr); T *filter_idx = reinterpret_cast<T *>(outputs[1]->addr); size_t count = 0; for (size_t i = 0; i < batch_size_; ++i) { T temp = input_x<i> - offset; if (temp < 0 || temp >= max_num) continue; filter_res[count] = temp; filter_idx[count] = i; count++; } MS_LOG(INFO) << "SubAndFilter output count is " << count;//内核输出计数为... std::vector<size_t> out_shape; out_shape.emplace_back(count);//y std::vector<TypeId> dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); for (size_t i = 0; i < output_num; i++) { dtypes.push_back(AnfAlgo::GetOutputInferDataType(node_, i)); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get()); } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\strided_slice_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/strided_slice_grad_cpu_kernel.h" //导入系统自带的库 #include <algorithm> #include <functional> //导入自定义的包 #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/fp32_grad/strided_slice_grad.h" #include "ir/primitive.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void StridedSliceGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { // 检查内核参数 param_ = (struct StridedSliceParameter *)malloc(sizeof(struct StridedSliceParameter));//获取参数 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); switch (dtype_) { case kNumberTypeFloat32: param_->data_type = kDataTypeFloat; break; default: MS_LOG(ERROR) << "Not supported data type: " << dtype_; } std::vector<size_t> input_shape_me = AnfAlgo::GetInputDeviceShape(kernel_node, 0); (void)std::transform(input_shape_me.begin(), input_shape_me.end(), std::back_inserter(input_shape_), [](const int64_t &value) { return static_cast<int>(value); }); param_->num_axes_ = input_shape_me.size(); param_->in_shape_length_ = input_shape_me.size(); std::vector<int64_t> begin_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, BEGIN); (void)std::transform(begin_me.begin(), begin_me.end(), std::back_inserter(begin_), [](const int64_t &value) { return static_cast<int>(value); }); auto prim = AnfAlgo::GetCNodePrimitive(kernel_node); MS_EXCEPTION_IF_NULL(prim); auto strides = prim->GetAttr(STRIDES);//获取步幅大小 std::vector<int64_t> strides_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, STRIDES); std::vector<int64_t> end_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, END); (void)std::transform(strides_me.begin(), strides_me.end(), std::back_inserter(strides_), [](const int64_t &value) { return static_cast<int>(value); }); (void)std::transform(end_me.begin(), end_me.end(), std::back_inserter(end_), [](const int64_t &value) { return static_cast<int>(value); }); if (strides_.size() != end_.size() || strides_.size() != output_shape_.size()) { MS_LOG(EXCEPTION) << "stride|end|input size must be equal"; } ExpandAllMemberDims(); std::copy(input_shape_.begin(), input_shape_.end(), param_->in_shape_); std::copy(begin_.begin(), begin_.end(), param_->begins_); std::copy(strides_.begin(), strides_.end(), param_->strides_); std::copy(end_.begin(), end_.end(), param_->ends_); } //扩大所有的数值 void StridedSliceGradCPUKernel::ExpandAllMemberDims() { auto input_len = input_shape_.size(); if (input_len < DIMENSION_8D) { for (size_t i = 0; i < DIMENSION_8D - input_len; ++i) { input_shape_.insert(input_shape_.begin(), 1); } } auto output_len = output_shape_.size();//获取输出值长度 if (output_len < DIMENSION_8D) { for (size_t i = 0; i < DIMENSION_8D - output_len; ++i) { output_shape_.insert(output_shape_.begin(), 1); begin_.insert(begin_.begin(), 0); strides_.insert(strides_.begin(), 1); end_.insert(end_.begin(), 1); } } param_->num_axes_ = DIMENSION_8D; param_->in_shape_length_ = DIMENSION_8D; for (size_t i = 0; i < DIMENSION_8D; ++i) { if (begin_<i> < 0) { begin_<i> += input_shape_<i>; } if (end_<i> < 0) { end_<i> += input_shape_<i>; } } } //检查数据类型 bool StridedSliceGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> & /*workspace*/, const std::vector<kernel::AddressPtr> &outputs) { bool ret{true}; if (dtype_ == kNumberTypeFloat32) { ret = LaunchKernel<float>(inputs, outputs); } else { MS_LOG(ERROR) << "StridedSliceGrad op only support float32"; return false; } return ret; } //检查内核参数 template <typename T> bool StridedSliceGradCPUKernel::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) { //类型转换 T *input_addr = reinterpret_cast<T *>(inputs[0]->addr); T *output_addr = reinterpret_cast<T *>(outputs[0]->addr); auto dx = reinterpret_cast<float *>(output_addr); auto dy = reinterpret_cast<float *>(input_addr); auto ElementsNum = std::accumulate(output_shape_.begin(), output_shape_.end(), 1LL, std::multiplies<int>()); std::fill(dx, dx + ElementsNum, 0.f); std::vector<int> output_; (void)std::transform(output_shape_.begin(), output_shape_.end(), std::back_inserter(output_), [](const size_t &value) { return static_cast<int>(value); }); auto ret = DoStridedSliceGrad(dy, dx, output_.data(), param_); free(param_); param_ = NULL; if (ret != EOK) { MS_LOG(ERROR) << "StridedSliceGrad error error_code[" << ret << "]"; return false; } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\split_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include //导入自定义的包 #include "backend/kernel_compiler/cpu/split_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void SplitCPUKernel::InitKernel(const CNodePtr &kernel_node) { axis_ = AnfAlgo::GetNodeAttr(kernel_node, "axis"); output_num_ = AnfAlgo::GetNodeAttr(kernel_node, "output_num"); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); CheckParam(kernel_node); Reshape(); } template void SplitCPUKernel::Reshape() {//转换形状 param_ = new SplitParameter();//获取参数 param_->num_split_ = output_num_;//获取输出值 param_->split_dim_ = axis_ >= 0 ? axis_ : input_shape_.size() + axis_; param_->strides_[input_shape_.size() - 1] = 1; for (int i = input_shape_.size() - 2; i >= 0; i--) { param_->strides_[i] = param_->strides_[i + 1] * input_shape_[i + 1]; } param_->split_sizes_ = new int[sizeof(int) * param_->num_split_]; int split_size = input_shape_[param_->split_dim_] / output_num_; for (int i = 0; i param_->num_split_; i++) { param_->split_sizes_[i] = split_size; } } //初始化输入输出大小 template void SplitCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); workspace_size_list_.emplace_back((sizeof(T *) * output_num_)); } template bool SplitCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { LaunchKernel(inputs, workspace, outputs);//检查内核输入值、工作空间、 输出值、 return true; } //检查分割 template void SplitCPUKernel::LaunchSplit(T *input, T **output, size_t size) { (void)std::transform(input_shape_.begin(), input_shape_.end(), std::back_inserter(input_shape_int_), [](const int &value) { return static_cast(value); }); auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取最大运行线程数 const float block_size = 128.0;//设置常量:模块大小 size_t thread_num = size block_size * max_thread_num ? std::ceil(size / block_size) : max_thread_num; param_->split_count_ = size / (input_shape_[param_->split_dim_] * param_->strides_[param_->split_dim_]); int num_unit = param_->split_count_ * param_->num_split_; int thread_n_stride; if (thread_num != 0) { thread_n_stride = UP_DIV(num_unit, thread_num); } //任务 auto task = [&](size_t start, size_t end) { int task_id = start / (size / thread_num);//获取任务id int thread_offset = task_id * thread_n_stride;//获取线程偏移量 int num_unit_thread = MSMIN(thread_n_stride, num_unit - task_id * thread_n_stride); DoSplit(input, reinterpret_cast(output), &input_shape_int_[0], thread_offset, num_unit_thread, param_, sizeof(T)); }; CPUKernelUtils::ParallelFor(task, size); return; } template void SplitCPUKernel::FreeTmpBuff() { if (param_->split_sizes_ != nullptr) { delete[] param_->split_sizes_; param_->split_sizes_ = nullptr; } if (param_ != nullptr) { delete param_; param_ = nullptr; } return; } //检查内核 template void SplitCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { //类型转换 T *input = reinterpret_cast(inputs[0]->addr); T **output = reinterpret_cast(workspace[0]->addr); for (size_t i = 0; i outputs.size(); i++) {//循环遍历转换输出值类型 output[i] = reinterpret_cast(outputs[i]->addr); } size_t size = static_cast(inputs[0]->size / sizeof(T)); LaunchSplit(input, output, size); FreeTmpBuff(); return; } //检查参数 template void SplitCPUKernel::CheckParam(const CNodePtr &kernel_node) { auto input_num = AnfAlgo::GetInputTensorNum(kernel_node); int64_t dims = SizeToLong(input_shape_.size()); int64_t output_num = SizeToLong(AnfAlgo::GetOutputTensorNum(kernel_node)); if (input_num != 1) {//如果输入值不等于1,记录错误日志,输入个数是...但是内核需要一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but Split needs 1 input."; } if (dims == 0) { MS_LOG(EXCEPTION) "Input dims is " dims ", scalar is not supported."; } if (axis_ -dims || axis_ >= dims) { MS_LOG(EXCEPTION) "Attr axis_ " axis_ " must be in " -dims "~" dims; } if (axis_ 0) { axis_ += SizeToInt(input_shape_.size()); } if (output_num_ > SizeToInt(input_shape_[axis_])) {//输出地址个数是...必须小于输入形状 MS_LOG(EXCEPTION) "Attr output_num " output_num_ " must less than " input_shape_[axis_]; } if (output_num_ != output_num) {//输出个数是...但是需要...g MS_LOG(EXCEPTION) "Output num is " output_num ", but need " output_num_; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\slice_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/slice_cpu_kernel.h" //导入系统自带的库 #include #include //导入自定义的库 #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 int NormalizeBeginPos(int begin_pos, int dim_len) { if (begin_pos 0) { int normal_pos = begin_pos + dim_len; return std::max(normal_pos, 0); } return std::min(begin_pos, dim_len - 1); } //初始化内核 void SliceCPUKernel::InitKernel(const CNodePtr &kernel_node) { static const std::unordered_map type_size_map = {{kNumberTypeBool, sizeof(bool)}, {kNumberTypeInt32, sizeof(int)}, {kNumberTypeFloat32, sizeof(float)}, {kNumberTypeFloat64, sizeof(double)}}; auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入形状 if (input_shape.size() > DIMENSION_8D || input_shape.empty()) {//Slice仅支持1D to 8D的参数变量 MS_LOG(EXCEPTION) "Slice only support 1D to 8D input tensor, but got " input_shape.size() "D."; } auto size = AnfAlgo::GetNodeAttr>(kernel_node, SIZE); auto begin = AnfAlgo::GetNodeAttr>(kernel_node, BEGIN); if (begin.size() != input_shape.size() || size.size() != input_shape.size()) { //切片要求起始长度和大小必须等于输入维度 MS_LOG(EXCEPTION) "Slice requires the length of begin and size must be equal to input dimension."; } InitSliceParam(input_shape, begin, size); TypeId dtype = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); auto size_pair = type_size_map.find(dtype); if (size_pair == type_size_map.end()) {//"Slice支持bool, int32, float32和float64输入张量,但得到了... MS_LOG(EXCEPTION) "Slice supports bool, int32, float32 and float64 input tensor, but got " TypeIdToType(dtype)->ToString(); } data_size_ = size_pair->second; } //多线程运行 void SliceCPUKernel::ParallelRun(void *input_addr, void *output_addr, int thread_num) { std::vector tasks; int thread_index = 0;//初始化变量 while (thread_index thread_num) {//若线程数小于支持线程数 auto block = [&, thread_index]() { DoSlice(input_addr, output_addr, &slice_param_, thread_index, data_size_); return common::SUCCESS; }; tasks.emplace_back(block); thread_index++; } common::ThreadPool::GetInstance().SyncRun(tasks); } //初始化内核参数 void SliceCPUKernel::InitSliceParam(const std::vector &input_shape, const std::vector &begin, const std::vector &size) { for (size_t i = 0; i DIMENSION_8D; i++) { if (i input_shape.size()) { int dim_len = SizeToInt(input_shape[i]); int begin_pos = LongToInt(begin[i]); int slice_size = LongToInt(size[i]); if (slice_size = 0) {//Slice要求每个维度的切片大小必须大于0 MS_LOG(EXCEPTION) "Slice requires the each dimension slice size must be greater than 0."; } slice_param_.shape_[i] = dim_len; slice_param_.size_[i] = slice_size; slice_param_.begin_[i] = NormalizeBeginPos(begin_pos, dim_len); int end = slice_param_.begin_[i] + slice_param_.size_[i]; slice_param_.end_[i] = std::min(end, dim_len); } else { slice_param_.shape_[i] = 1; slice_param_.begin_[i] = 0; slice_param_.size_[i] = 1; slice_param_.end_[i] = 1; } } slice_param_.param_length_ = DIMENSION_8D; size_t max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); slice_param_.op_parameter_.thread_num_ = std::min(slice_param_.size_[1], SizeToInt(max_thread_num)); } //检查参数 bool SliceCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (outputs[0]->size == 0) { return true; } auto input_addr = inputs[0]->addr;//获取输入地址 auto output_addr = outputs[0]->addr;//h int thread_num = slice_param_.op_parameter_.thread_num_; if (parallel_ && thread_num >= 2) { ParallelRun(input_addr, output_addr, thread_num); } else { DoSliceNoParallel(input_addr, output_addr, &slice_param_, data_size_); } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\sigmoid_cross_entropy_with_logits_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/sigmoid_cross_entropy_with_logits_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void SigmoidCrossEntropyWithLogitsCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node);//检查内核参数 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取节点输出数据类型 std::vector x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (const uint64_t &d : x_shape) { tensor_size_ *= d; } } //检查内核支持数据类型 bool SigmoidCrossEntropyWithLogitsCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { LaunchKernel(inputs, outputs); } else {//输入数据类型只支持float16、float32、float64 MS_LOG(EXCEPTION) "input dtype only support float16, float32, float64"; } return true; } //检查内核 template void SigmoidCrossEntropyWithLogitsCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { //类型转换 auto logits_addr = reinterpret_cast(inputs[0]->addr);//获取模型地址 auto labels_addr = reinterpret_cast(inputs[1]->addr);//获取标签地址 auto output_addr = reinterpret_cast(outputs[0]->addr);//获取输出值地址 //初始化变量 T zero = (T)0.0; T one = (T)1.0; T two = (T)2.0; for (uint64_t i = 0; i tensor_size_; ++i) {//循环遍历数组 if (logits_addr[i] >= zero) { output_addr[i] = log1p(exp(logits_addr[i] - two * logits_addr[i])) - logits_addr[i] * (labels_addr[i] - one); } else { output_addr[i] = log1p(exp(logits_addr[i])) - logits_addr[i] * labels_addr[i]; } } } //检查参数 void SigmoidCrossEntropyWithLogitsCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 2) {//内核需要两个输入,但是获得输入个数是... MS_LOG(EXCEPTION) "SigmoidCrossEntropyWithLogitsCPUKernel needs 2 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//内核希望获得一个输出,但是获得输c个数是... MS_LOG(EXCEPTION) "SigmoidCrossEntropyWithLogitsCPUKernel expects 1 output, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\sgd_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/sgd_cpu_kernel.h" //导入系统自带的库 #include #include namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 //声明常量 constexpr size_t kInputSize = 6;//输入值个数为6 constexpr size_t kOutputSize = 1;//输出值个数为1 } // namespace //初始化内核 template void SGDCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 dampening_ = AnfAlgo::GetNodeAttr(kernel_node, "dampening"); weight_decay_ = AnfAlgo::GetNodeAttr(kernel_node, "weight_decay"); nesterov_ = AnfAlgo::GetNodeAttr(kernel_node, "nesterov");//标签值 } template void SGDCPUKernel::CheckParam(const std::vector &inputs, const std::vector &outputs) { // 输入值: params, grad, lr, accum, momentum, stat if (inputs.size() != kInputSize) { MS_LOG(EXCEPTION) "Input number is " inputs.size() ", but SGD needs 6 inputs."; } // 输出值: param if (outputs.size() != kOutputSize) { MS_LOG(EXCEPTION) "Output number is " outputs.size() ", but SGD needs 1 outputs."; } } template bool SGDCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { CheckParam(inputs, outputs); //l auto param = reinterpret_cast(inputs[0]->addr); auto grad = reinterpret_cast(inputs[1]->addr); auto lr = reinterpret_cast(inputs[2]->addr); auto accum = reinterpret_cast(inputs[3]->addr); auto momentum = reinterpret_cast(inputs[4]->addr); auto stat = reinterpret_cast(inputs[5]->addr); size_t elem_num = inputs[0]->size / sizeof(float); auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { T grad_new = grad[i]; if (weight_decay_ > 0) { grad_new += param[i] * static_cast(weight_decay_); } if (momentum[0] > static_cast(0)) { if (stat[i] > static_cast(0)) { accum[i] = grad_new; stat[i] = static_cast(0); } else { accum[i] = accum[i] * momentum[0] + static_cast(1.0 - dampening_) * grad_new; } if (nesterov_) { grad_new += accum[i] * momentum[0]; } else { grad_new = accum[i]; } } param[i] -= lr[0] * grad_new; } }; CPUKernelUtils::ParallelFor(task, elem_num); return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\select_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/select_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 template void SelectCPUKernel::InitKernel(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//h if (input_num != 3) {//若输入值个数不等于3,记录错误日志:输入个数是...但是Select_Cpu_Kernel需要3个输入值 MS_LOG(EXCEPTION) "Input number is " input_num ", but SelectCpuKernel needs 3 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//若输入值个数不等于1,记录错误日志:输入个数是...但是Select_Cpu_Kernel需要1个输入值 MS_LOG(EXCEPTION) "Output number is " output_num ", but SelectCpuKernel needs 1 output."; } auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); for (size_t x : shape) { element_num_ *= x; } return; } //检查内核 template bool SelectCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { //类型转换 auto input_cond = reinterpret_cast(inputs[0]->addr); auto input_x = reinterpret_cast(inputs[1]->addr);//获取输入参数 auto input_y = reinterpret_cast(inputs[2]->addr);//获取由输出参数得到的输出值 auto output = reinterpret_cast(outputs[0]->addr);//获取输出值 auto task = [=](const size_t start, const size_t end) {//获取任务值 for (size_t pos = start; pos end; pos++) { output[pos] = input_cond[pos] ? input_x[pos] : input_y[pos]; } }; CPUKernelUtils::ParallelFor(task, element_num_); return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\scatter_arithmetic_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/scatter_arithmetic_cpu_kernel.h" //导入系统自带的库 #include #include //导入自定义的库 #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 template void ScatterArithmeticCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node);//检查内核参数 kernel_name_ = AnfAlgo::GetCNodeName(kernel_node);//获取内核名 auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入形状 input_size_ = 1;//初始化变量 inner_size_ = 1;//初始化变量 for (size_t i = 1; i input_shape.size(); i++) {//循环遍历数组获取输入形状 inner_size_ *= input_shape[i]; } input_size_ = input_shape[0] * inner_size_; auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); indices_size_ = 1; for (size_t i = 0; i indices_shape.size(); i++) { indices_size_ *= indices_shape[i]; } } //检查输入参数个数 template void ScatterArithmeticCPUKernel::CheckParam(const CNodePtr &kernel_node) const { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核参数是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) {//若输入值个数不等于3,记录错误日志:输入个数是...但是分散添加需要3个输入值 MS_LOG(EXCEPTION) "Input number is " input_num ", but ScatterAdd needs 3 inputs."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//若输入值个数不等于1,记录错误日志:输入个数是...但是分散添加需要1个输入值 MS_LOG(EXCEPTION) "Output number is " output_num ", but ScatterAdd has 1 output."; } } //检查内核参数 template bool ScatterArithmeticCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { static const std::map> kScatterArithmeticBinOpFuncMap{{"ScatterAdd", &ScatterArithmeticCPUKernel::ScatterAdd}, {"ScatterSub", &ScatterArithmeticCPUKernel::ScatterSub}, {"ScatterMul", &ScatterArithmeticCPUKernel::ScatterMul}, {"ScatterDiv", &ScatterArithmeticCPUKernel::ScatterDiv}, {"ScatterMax", &ScatterArithmeticCPUKernel::ScatterMax}, {"ScatterMin", &ScatterArithmeticCPUKernel::ScatterMin}, {"ScatterUpdate", &ScatterArithmeticCPUKernel::ScatterUpdate}}; if (kScatterArithmeticBinOpFuncMap.find(kernel_name_) != kScatterArithmeticBinOpFuncMap.end()) { //类型转换 T *input = reinterpret_cast(inputs[0]->addr); int *indices = reinterpret_cast(inputs[1]->addr); T *updates = reinterpret_cast(inputs[2]->addr); T *output = reinterpret_cast(outputs[0]->addr); kScatterArithmeticBinOpFuncMap.at(kernel_name_)(this, input, indices, updates); auto bufferSize = outputs[0]->size; auto ret = memcpy_s(output, bufferSize, input, input_size_ * sizeof(T)); if (ret != EOK) { MS_LOG(EXCEPTION) "Memory copy failed!";//内存复制失败 } } else {//无支持运算符 MS_LOG(EXCEPTION) "Not support operator:" kernel_name_; } return true; } template void ScatterArithmeticCPUKernel::ScatterAdd(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) { auto base_index_updates = i * inner_size_; auto base_index_input = indices[i] * inner_size_; for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] += updates[base_index_updates + j]; } } } //分散添加基线 template void ScatterArithmeticCPUKernel::ScatterSub(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) { auto base_index_updates = i * inner_size_; auto base_index_input = indices[i] * inner_size_; for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] -= updates[base_index_updates + j]; } } } template void ScatterArithmeticCPUKernel::ScatterMul(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) { auto base_index_updates = i * inner_size_; auto base_index_input = indices[i] * inner_size_; for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] *= updates[base_index_updates + j]; } } } //等分模块 template void ScatterArithmeticCPUKernel::ScatterDiv(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) { for (size_t j = 0; j inner_size_; j++) { auto dividend = input[indices[i] * inner_size_ + j];//获取被除数 auto divisor = updates[i * inner_size_ + j];//获取除数 if (divisor == 0) { if (dividend == 0) {//被除数和除数都为0时 input[indices[i] * inner_size_ + j] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { input[indices[i] * inner_size_ + j] = dividend > 0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { input[indices[i] * inner_size_ + j] = dividend > 0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } input[indices[i] * inner_size_ + j] = dividend / divisor;//计算得最后模块数 } } } //分散模块的最大数 template void ScatterArithmeticCPUKernel::ScatterMax(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) {//循环遍历 auto base_index_updates = i * inner_size_;//获取被除数 auto base_index_input = indices[i] * inner_size_;//获取除数 for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] = input[base_index_input + j] > updates[base_index_updates + j] ? input[base_index_input + j] : updates[base_index_updates + j]; } } } //分散模块的最小数 template void ScatterArithmeticCPUKernel::ScatterMin(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) {//循环遍历 auto base_index_updates = i * inner_size_;//获取被除数 auto base_index_input = indices[i] * inner_size_;//获取除数 for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] = input[base_index_input + j] updates[base_index_updates + j] ? input[base_index_input + j] : updates[base_index_updates + j]; } } } //更新分散m template void ScatterArithmeticCPUKernel::ScatterUpdate(T *input, const int *indices, const T *updates) { for (size_t i = 0; i indices_size_; i++) { auto base_index_updates = i * inner_size_; auto base_index_input = indices[i] * inner_size_; for (size_t j = 0; j inner_size_; j++) { input[base_index_input + j] = updates[base_index_updates + j]; } } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\rmsprop_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/rmsprop_cpu_kernel.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void RMSPropCPUKernel::InitKernel(const CNodePtr &kernel_node) { auto node_name = AnfAlgo::GetCNodeName(kernel_node); if (node_name == "ApplyCenteredRMSProp") { use_center_ = true; } if (node_name == "ApplyRMSProp") { decay_ = AnfAlgo::GetNodeAttr(kernel_node, "rho"); momentum_ = AnfAlgo::GetNodeAttr(kernel_node, "momentum"); epsilon_ = AnfAlgo::GetNodeAttr(kernel_node, "epsilon"); } auto input_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); for (auto &dim : input_shape) { size_ *= dim; } } bool RMSPropCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (!use_center_) { //类型转换 float *variable = reinterpret_cast(inputs[0]->addr); float *mean_square = reinterpret_cast(inputs[1]->addr); float *moment = reinterpret_cast(inputs[2]->addr); float *learning_rate = reinterpret_cast(inputs[3]->addr); float *gradients = reinterpret_cast(inputs[4]->addr); for (size_t i = 0; i size_; i++) { mean_square[i] += (gradients[i] * gradients[i] - mean_square[i]) * (1.0 - decay_); moment[i] = moment[i] * momentum_ + (gradients[i] * learning_rate[0]) / sqrt(mean_square[i] + epsilon_); variable[i] -= moment[i]; } } else { //类型转换 float *variable = reinterpret_cast(inputs[0]->addr); float *mean_gradients = reinterpret_cast(inputs[1]->addr); float *mean_square = reinterpret_cast(inputs[2]->addr); float *moment = reinterpret_cast(inputs[3]->addr); float *gradients = reinterpret_cast(inputs[4]->addr); float *learning_rate = reinterpret_cast(inputs[5]->addr); float *decay = reinterpret_cast(inputs[6]->addr); float *momentum = reinterpret_cast(inputs[7]->addr); float *epsilon = reinterpret_cast(inputs[8]->addr); for (size_t i = 0; i size_; i++) { mean_square[i] += (gradients[i] * gradients[i] - mean_square[i]) * (1.0 - decay[0]); mean_gradients[i] += (gradients[i] - mean_gradients[i]) * (1.0 - decay[0]); auto denom = (mean_square[i] - mean_gradients[i] * mean_gradients[i]) + epsilon[0];//获取分母项 if (denom > 0) { moment[i] = moment[i] * momentum[0] + (gradients[i] * learning_rate[0]) / sqrt(denom); variable[i] -= moment[i]; } } } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\resize_nearest_neighbor_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/resize_nearest_neighbor_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "backend/kernel_compiler/common_utils.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void ResizeNearestNeighborCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node);//检查内核参数 std::vector input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); std::vector output_size = AnfAlgo::GetNodeAttr>(kernel_node, SIZE); align_corners_ = AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); batch_size_ = input_shape[0]; channel_ = input_shape[1]; in_height_ = input_shape[2]; in_width_ = input_shape[3]; out_height_ = output_size[0]; out_width_ = output_size[1]; height_scale_ = Scaling(in_height_, out_height_, align_corners_); width_scale_ = Scaling(in_width_, out_width_, align_corners_); output_size_ = batch_size_ * channel_ * out_height_ * out_width_; } //检查内核数据类型 bool ResizeNearestNeighborCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat64) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } return true; } //检查参数 template void ResizeNearestNeighborCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto input_addr = reinterpret_cast(inputs[0]->addr);//获取输入值地址 auto output_addr = reinterpret_cast(outputs[0]->addr);//获取输出值地址 if (out_height_ == in_height_ && out_width_ == in_width_) { for (size_t i = 0; i output_size_; ++i) { output_addr[i] = input_addr[i]; } } for (size_t i = 0; i output_size_; ++i) { size_t pos0 = i / (channel_ * out_height_ * out_width_) % batch_size_; size_t pos1 = i / (out_height_ * out_width_) % channel_; size_t pos2 = i / (out_width_) % out_height_; size_t pos3 = i % out_width_; const size_t in_y = std::min((align_corners_) ? static_cast(roundf(pos2 * height_scale_)) : static_cast(floorf(pos2 * height_scale_)), in_height_ - 1); const size_t in_x = std::min((align_corners_) ? static_cast(roundf(pos3 * width_scale_)) : static_cast(floorf(pos3 * width_scale_)), in_width_ - 1); size_t input_pos = pos0 * channel_ * in_height_ * in_width_ + pos1 * in_height_ * in_width_ + in_y * in_width_ + in_x; output_addr[i] = input_addr[input_pos]; } } //j void ResizeNearestNeighborCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//调整双线性需要一个输入值,但是却获得的输入值个数为... MS_LOG(EXCEPTION) "ResizeBilinear needs 1 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//调整双线性需要一个输出值,但是却获得的输出值个数为... MS_LOG(EXCEPTION) "ResizeBilinear expects 1 output, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```
上滑加载中
推荐直播
-
DTT年度收官盛典:华为开发者空间大咖汇,共探云端开发创新
2025/01/08 周三 16:30-18:00
Yawei 华为云开发工具和效率首席专家 Edwin 华为开发者空间产品总监
数字化转型进程持续加速,驱动着技术革新发展,华为开发者空间如何巧妙整合鸿蒙、昇腾、鲲鹏等核心资源,打破平台间的壁垒,实现跨平台协同?在科技迅猛发展的今天,开发者们如何迅速把握机遇,实现高效、创新的技术突破?DTT 年度收官盛典,将与大家共同探索华为开发者空间的创新奥秘。
去报名 -
GaussDB应用实战:手把手带你写SQL
2025/01/09 周四 16:00-18:00
Steven 华为云学堂技术讲师
本期直播将围绕数据库中常用的数据类型、数据库对象、系统函数及操作符等内容展开介绍,帮助初学者掌握SQL入门级的基础语法。同时在线手把手教你写好SQL。
去报名
热门标签