kernel_标签_开发者

博客(90)
视频(0)
论坛(168)
云声(0)
代码示例(0)

[活动体验] cpu\resize_bilinear_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\resize_bilinear_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/resize_bilinear_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" #include "backend/kernel_compiler/common_utils.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void ResizeBilinearCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node);//检查内核参数 shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取节点原始输出形状 size_ = AnfAlgo::GetNodeAttr>(kernel_node, SIZE);//获取节点大小 align_corners_ = AnfAlgo::GetNodeAttr(kernel_node, "align_corners"); dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 size_t in_height = shape_[2]; size_t in_width = shape_[3]; size_t out_height = size_[0]; size_t out_width = size_[1]; height_scale = Scaling(in_height, out_height, align_corners_); width_scale = Scaling(in_width, out_width, align_corners_); } //检查内核数据类型 bool ResizeBilinearCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32) { LaunchKernel(inputs, outputs); } return true; } //检查内核 template void ResizeBilinearCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto input_addr = reinterpret_cast(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); size_t batch_size = shape_[0];//获取批处理大小 size_t channel = shape_[1]; size_t in_height = shape_[2]; size_t in_width = shape_[3]; size_t out_height = size_[0]; size_t out_width = size_[1]; size_t out_hw_size = out_height * out_width; size_t in_hw_size = in_height * in_width; size_t bhwc_size = in_hw_size * channel * batch_size; if (out_height == in_height && out_width == in_width) { for (size_t i = 0; i bhwc_size; ++i) { output_addr[i] = static_cast(input_addr[i]); } } std::vector ys(out_height + 1); std::vector xs(out_width + 1); ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data()); ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data()); for (size_t b = 0; b batch_size; ++b) { for (size_t c = 0; c channel; ++c) { for (size_t h = 0; h out_height; ++h) { const T1 *ys_input_lower_ptr = input_addr + ys[h].lower * in_width; const T1 *ys_input_upper_ptr = input_addr + ys[h].upper * in_width; const T2 ys_lerp = T2(ys[h].lerp); for (size_t w = 0; w out_width; ++w) { const size_t xs_lower = xs[w].lower; const size_t xs_upper = xs[w].upper; const T2 xs_lerp = T2(xs[w].lerp); const T2 top_left(ys_input_lower_ptr[xs_lower]); const T2 top_right(ys_input_lower_ptr[xs_upper]); const T2 bottom_left(ys_input_upper_ptr[xs_lower]); const T2 bottom_right(ys_input_upper_ptr[xs_upper]); output_addr[h * out_width + w] = ComputeLerp(top_left, top_right, bottom_left, bottom_right, xs_lerp, ys_lerp); } } output_addr += out_hw_size; input_addr += in_hw_size; } } } //检查参数 void ResizeBilinearCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 1) {//调整双线性需要一个输入值，但是却获得的输入值个数为... MS_LOG(EXCEPTION) "ResizeBilinear needs 1 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//调整双线性需要一个输出值，但是却获得的输出值个数为... MS_LOG(EXCEPTION) "ResizeBilinear expects 1 output, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:43:03 2021-11-01 19:43:03 最后回复一鲸落 2021-11-01 19:43:03
316 0

kernel
[活动体验] cpu\reduce_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\reduce_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/reduce_cpu_kernel.h" //导入系统自带的库 #include #include #include #include namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template //初始化内核 void ReduceCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输出形状 auto axis_addr = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr(AXIS);//获取节点原始地址 if (axis_addr->isa() || axis_addr->isa()) { axis_ = AnfAlgo::GetNodeAttr>(kernel_node, AXIS); } else if (axis_addr->isa()) { axis_.emplace_back(AnfAlgo::GetNodeAttr(kernel_node, AXIS)); } else { MS_LOG(EXCEPTION) "Attribute is invalid";//属性是无效的 } int dimension = input_shape_.size(); std::transform(axis_.begin(), axis_.end(), axis_.begin(), [dimension](const auto &a) { return a 0 ? dimension + a : a; }); sort(axis_.begin(), axis_.end()); // 删除复制行 auto last = std::unique(axis_.begin(), axis_.end()); axis_.erase(last, axis_.end()); auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); //根据不同内核名执行相应的操作 if constexpr (std::is_same::value) { if (kernel_name == "ReduceAll") { reduce_type_ = kReduceAll; reduce_func_ = [](const T *input, size_t pos, T *out) { *out &= input[pos]; }; } else if (kernel_name == "ReduceAny") { reduce_type_ = kReduceAny; reduce_func_ = [](const T *input, size_t pos, T *out) { *out |= input[pos]; }; } else {//内核名称为false，不支持删除操作 MS_LOG(EXCEPTION) "Unsupported reduce operation: " kernel_name_ " for bool."; } } else { if (kernel_name == "ReduceMax") { reduce_type_ = kReduceMax; reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::max(input[pos], *out); }; } else if (kernel_name == "ReduceMin") { reduce_type_ = kReduceMin; reduce_func_ = [](const T *input, size_t pos, T *out) { *out = std::min(input[pos], *out); }; } else if (kernel_name == "ReduceSum") { reduce_type_ = kReduceSum; reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; } else if (kernel_name == "ReduceMean") { reduce_type_ = kReduceMean; reduce_func_ = [](const T *input, size_t pos, T *out) { *out += input[pos]; }; } else { MS_LOG(EXCEPTION) "Unsupported reduce operation: " kernel_name; } } } //检查内核 template bool ReduceCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspaces*/, const std::vector &outputs) { size_t input_size = inputs[0]->size / sizeof(T); //类型转换 auto input_addr = reinterpret_cast(inputs[0]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); if (axis_.empty() || input_shape_.empty() || input_shape_.size() == 1) { // Get one ret *output_addr = input_addr[0]; for (size_t i = 1; i input_size; ++i) { reduce_func_(input_addr, i, output_addr); } if (reduce_type_ == kReduceMean) { *output_addr /= input_size; } } else { // 计算转换行 int dimension = input_shape_.size(); size_t stride = 1; std::vector axes(input_shape_.size()); size_t j = 0; size_t k = 0; for (int i = 0; i dimension; ++i) { if (j == axis_.size() || i != axis_[j]) { axes[k] = i; ++k; } else { stride *= input_shape_[i]; ++j; } } for (auto &it : axis_) { axes[k] = it; ++k; } // Calculate transpose shape std::vector transpose_shape(input_shape_.size()); for (int i = 0; i dimension; ++i) { transpose_shape[i] = input_shape_[axes[i]]; } size_t output_size = outputs[0]->size / sizeof(T); TransposeIterator base_iter(std::move(transpose_shape), std::move(axes), input_shape_); auto task = [this, &base_iter, input_addr, output_addr, stride](size_t start, size_t end) { auto iter = base_iter; iter.SetPos(start * stride); for (size_t i = start; i end; ++i) { output_addr[i] = input_addr[iter.GetPos()]; iter.GenNextPos(); for (size_t j = 1; j stride; ++j) { reduce_func_(input_addr, iter.GetPos(), &output_addr[i]); iter.GenNextPos(); } if (reduce_type_ == kReduceMean) { output_addr[i] /= stride; } } }; CPUKernelUtils::ParallelFor(task, output_size); } return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:42:22 2021-11-01 19:42:22 最后回复一鲸落 2021-11-01 19:42:22
224 0

kernel
[活动体验] cpu\random_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\random_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的库 #include #include //导入自定义的库 #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" #include "backend/kernel_compiler/cpu/random_cpu_kernel.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //标准正太分布 void StandardNormal(float *output, std::normal_distribution distribution, std::default_random_engine random_generator, size_t start, size_t end) { for (size_t i = start; i end; i++) { output[i] = distribution(random_generator); } } //检查是否为标准正态分布 void LaunchStandardNormal(int seed, int seed2, const std::vector &outputs) { unsigned int RNG_seed; std::random_device rd; if (seed2 != 0) { RNG_seed = IntToUint(seed2); } else if (seed != 0) { RNG_seed = IntToUint(seed); } else { RNG_seed = rd(); } auto output = reinterpret_cast(outputs[0]->addr);//类型转换 size_t lens = outputs[0]->size / sizeof(float);//计算每行输出字节数 std::normal_distribution distribution; auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取同时运行最大线程数 const float block_size = 128.0;//获取模块大小 size_t thread_num = lens block_size * max_thread_num ? std::ceil(lens / block_size) : max_thread_num; std::vector tasks; size_t start = 0; size_t once_compute_size = (lens + thread_num - 1) / thread_num;//输出一次计算大小 while (start lens) { size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size); std::default_random_engine random_generator(++RNG_seed); auto block = [&, start, end]() { StandardNormal(output, distribution, random_generator, start, end); return common::SUCCESS; }; tasks.emplace_back(block); start += once_compute_size; } common::ThreadPool::GetInstance().SyncRun(tasks); } //初始化内核 void RandomCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); auto iter = kRandomOpTypeMap.find(kernel_name);// if (iter == kRandomOpTypeMap.end()) {//错误日志：不支持随机操作 MS_LOG(EXCEPTION) "Random operation " kernel_name " is not supported."; } else { random_op_type_ = iter->second; } size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入张量个数 //判断输入个数是否符合要求 if ((random_op_type_ == RANDOM_OP_NORMAL) && input_num != 1) {//输入张量个数是...但仅需要的一个张量 MS_LOG(EXCEPTION) "Input number is " input_num ", but random op needs 1 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//输出张量个数是...但仅需要的一个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but random op needs 1 output."; } seed_ = LongToInt(GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed"))); seed2_ = LongToInt(GetValue(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("seed2"))); } //检查内核类型 bool RandomCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { switch (random_op_type_) { case RANDOM_OP_NORMAL: { LaunchStandardNormal(seed_, seed2_, outputs); break; } default: {//随机操作的类型s MS_LOG(EXCEPTION) "Random operation " random_op_type_ " is not supported."; } } return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:41:39 2021-11-01 19:41:39 最后回复一鲸落 2021-11-01 19:41:39
323 0

kernel
[活动体验] cpu\pad_and_shift_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\pad_and_shift_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/pad_and_shift_cpu_kernel.h" //导入系统自带的库 #include //导入自定义的库 #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 //初始化内核 void PadAndShiftCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 node_wpt_ = kernel_node;// input_x_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取输入数据类型 type_size_ = GetTypeByte(TypeIdToType(input_x_dtype_));//获取数据类型大小 auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); batch_size_ = 1;//初始化变量 for (size_t i = 0; i indices_shape.size(); ++i) { batch_size_ *= indices_shape[i]; } MS_LOG(INFO) "PadAndShift batch_size:" batch_size_; auto cum_sum_arr_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); if (cum_sum_arr_shape.size() != 1) { MS_LOG(ERROR) "The shape of cum_sum_arr must be 1."; } cum_sum_size_ = cum_sum_arr_shape[0]; } //检查输入参数数据大小 bool PadAndShiftCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (input_x_dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else if (input_x_dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } else { MS_LOG(ERROR) "Dtype of input_x only support int32, int64"; return false; } return true; } //检查内核 template void PadAndShiftCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) //转换参数类型 T *input_x = reinterpret_cast(inputs[0]->addr); T *cum_sum_arr = reinterpret_cast(inputs[1]->addr); T shift_idx = *reinterpret_cast(inputs[2]->addr); T *output = reinterpret_cast(outputs[0]->addr); if (shift_idx >= static_cast(cum_sum_size_)) {//移位索引必须小于累积大小 MS_LOG(EXCEPTION) "Shift index must small than cumsum size."; } size_t output_size = cum_sum_arr[cum_sum_size_ - 1]; T shift_size = cum_sum_arr[shift_idx]; T valid_size = cum_sum_arr[shift_idx + 1] - shift_size; int ret = memset_s(output, outputs[0]->size, -1, type_size_ * output_size); if (ret != 0) { MS_LOG(EXCEPTION) "memset_s error, errorno" ret; } ret = memcpy_s(output + shift_size, valid_size * type_size_, input_x, valid_size * type_size_); if (ret != 0) { MS_LOG(EXCEPTION) "memcpy_s error, errorno" ret; } std::vector out_shape; out_shape.emplace_back(output_size); std::vector dtypes; auto node_ = node_wpt_.lock(); if (!node_) { MS_LOG(EXCEPTION) "node_wpt_ is expired.";//node_wpt_s } auto output_nums = AnfAlgo::GetOutputTensorNum(node_); for (size_t i = 0; i output_nums; i++) { dtypes.push_back(AnfAlgo::GetOutputInferDataType(node_, i)); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get()); } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:40:56 2021-11-01 19:40:56 最后回复一鲸落 2021-11-01 19:40:56
316 0

kernel
[活动体验] cpu\pack_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\pack_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/pack_cpu_kernel.h" //导入系统自带的库 #include #include namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template PackCpuFwdKernel::PackCpuFwdKernel() : axis_(0), input_num_(1), output_size_(0), dims_behind_axis_(1), inputs_host_(nullptr) {} template void PackCpuFwdKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 axis_ = AnfAlgo::GetNodeAttr(kernel_node, AXIS);//获取行节点地址 input_num_ = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入张量大小 if (axis_ 0) { auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); axis_ += (SizeToInt(input_shape.size()) + 1); } //dim >= axis时计算元素 auto first_input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); for (size_t i = IntToSize(axis_); i first_input_shape.size(); i++) { dims_behind_axis_ *= first_input_shape[i]; } auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); output_size_ = 1; for (size_t i = 0; i output_shape.size(); i++) { output_size_ *= output_shape[i]; } } template bool PackCpuFwdKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { //检查输出值参数 if (!CheckParam(outputs)) { return false; } auto output = reinterpret_cast(outputs[0]->addr); inputs_host_ = std::make_unique(input_num_); for (size_t i = 0; i inputs.size(); i++) { inputs_host_[i] = reinterpret_cast(inputs[i]->addr); } // 多线程方法 size_t input_size = output_size_; size_t max_thread_num = std::max(std::thread::hardware_concurrency(), static_cast(1)); size_t use_thread_num = input_size 128 * max_thread_num ? std::ceil(static_cast(input_size / 128.0)) : max_thread_num; std::vector threads; if (use_thread_num 1) { use_thread_num = 1; } threads.reserve(use_thread_num); size_t start = 0; size_t batch_size = (input_size + use_thread_num - 1) / use_thread_num;//计算批次大小 while (start input_size) { size_t end = (start + batch_size) > input_size ? input_size : (start + batch_size); threads.emplace_back(std::thread(&PackCpuFwdKernel::PackTensor, this, output, start, end)); start += batch_size; } for (auto &it : threads) { it.join(); } return true; } //检查sa template bool PackCpuFwdKernel::CheckParam(const std::vector &outputs) const { if (outputs.size() != 1) { MS_LOG(EXCEPTION) "Output number is " outputs.size() ", but PackGpuFwdKernel needs 1 output."; return false; } return true; } //打包张量 template void PackCpuFwdKernel::PackTensor(T *output, size_t start, size_t end) { for (size_t pos = start; pos end; ++pos) { size_t cur_input_index = pos / dims_behind_axis_ % input_num_; size_t cycle_len = input_num_ * dims_behind_axis_; size_t local_index = pos / cycle_len * dims_behind_axis_ + pos % cycle_len % dims_behind_axis_; output[pos] = inputs_host_[cur_input_index][local_index]; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:40:16 2021-11-01 19:40:16 最后回复一鲸落 2021-11-01 19:40:16
352 0

kernel
[活动体验] cpu\one_hot_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\one_hot_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/one_hot_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核 MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出形状 if (output_shape.size() 2) {//如果输出形状小于2，记录错误日志：无效输出形状大小为... MS_LOG(EXCEPTION) "invalid output shape size: " output_shape.size(); } int64_t axis = AnfAlgo::GetNodeAttr(kernel_node, AXIS); if (axis != -1 && LongToSize(axis) >= output_shape.size()) {//若行的长度小于输出形状大小，则记录错误日志;无效行数 MS_LOG(EXCEPTION) "invalid axis: " axis; } if (axis == -1) { axis_ = output_shape.size() - 1; } else { axis_ = LongToSize(axis); } depth_ = output_shape[axis_]; stride_ = 1; for (size_t i = axis_ + 1; i output_shape.size(); ++i) { stride_ *= output_shape[i]; } } //将数据转换为one-hotb bool OneHotCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() 3 || outputs.empty()) { MS_LOG(EXCEPTION) "input or output invalid!"; } //转换变量数据类型 auto indices = reinterpret_cast(inputs[0]->addr); auto on_value = reinterpret_cast(inputs[1]->addr)[0]; auto off_value = reinterpret_cast(inputs[2]->addr)[0]; auto output = reinterpret_cast(outputs[0]->addr); size_t elem_num = inputs[0]->size / sizeof(int); for (size_t i = 0; i elem_num; i++) { size_t stride_num = i / stride_; size_t output_index = stride_num * depth_ * stride_ + i % stride_; size_t index = IntToSize(indices[i]); for (size_t j = 0; j depth_; j++) { if (index == j) { output[output_index] = on_value; } else { output[output_index] = off_value; } output_index += stride_; } } return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:39:36 2021-11-01 19:39:36 最后回复一鲸落 2021-11-01 19:39:36
326 0

kernel
[活动体验] cpu\maximum_cpu_kernel.cc代码标注2

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\maximum_cpu_kernel.cc代码标注2 ```c++ template void MaximumCPUKernel::InitTensorBroadcastShape() { if (output_shape_.size() > max_dims) { MS_LOG(EXCEPTION) "Broadcast operation not support dim greater than 7";//变量超过7不支持广播操作 broadcast_input_x_shape_.resize(max_dims, 1); broadcast_input_y_shape_.resize(max_dims, 1); broadcast_output_shape_.resize(max_dims, 1); for (size_t i = 0; i output_shape_.size(); i++) {//广播输出形状 broadcast_output_shape_[i] = output_shape_[i]; } int input_x_dim_offset = output_shape_.size() - input_x_shape_.size(); for (size_t j = 0; j input_x_shape_.size(); j++) {//广播输入形状 broadcast_input_x_shape_[j + input_x_dim_offset] = input_x_shape_[j]; input_x_num_ *= input_x_shape_[j]; } int input_y_dim_offset = output_shape_.size() - input_y_shape_.size(); for (size_t k = 0; k input_y_shape_.size(); k++) { if (need_broadcast_) { broadcast_input_y_shape_[k + input_y_dim_offset] = input_y_shape_[k]; input_y_num_ *= input_y_shape_[k]; } } } //广播比较 template size_t MaximumCPUKernel::Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; } //广播算法 template void MaximumCPUKernel::BroadcastArithKernel(const size_t l0, const size_t l1, const size_t l2, const size_t l3, const size_t l4, const size_t l5, const size_t l6, const size_t r0, const size_t r1, const size_t r2, const size_t r3, const size_t r4, const size_t r5, const size_t r6, const size_t d0, const size_t d1, const size_t d2, const size_t d3, const size_t d4, const size_t d5, const size_t d6, const T *input_x, const T *input_y, T *output) { //判断参数是否为空 MS_EXCEPTION_IF_NULL(input_x); MS_EXCEPTION_IF_NULL(input_y); MS_EXCEPTION_IF_NULL(output); for (size_t pos = 0; pos output_num_; pos++) { size_t i = pos / (d1 * d2 * d3 * d4 * d5 * d6) % d0; size_t j = pos / (d2 * d3 * d4 * d5 * d6) % d1; size_t k = pos / (d3 * d4 * d5 * d6) % d2; size_t l = pos / (d4 * d5 * d6) % d3; size_t m = pos / (d5 * d6) % d4; size_t n = pos / d6 % d5; size_t o = pos % d6; size_t l_index = Index(i, l0) * l1 * l2 * l3 * l4 * l5 * l6; l_index += Index(j, l1) * l2 * l3 * l4 * l5 * l6; l_index += Index(k, l2) * l3 * l4 * l5 * l6; l_index += Index(l, l3) * l4 * l5 * l6; l_index += Index(m, l4) * l5 * l6; l_index += Index(n, l5) * l6; l_index += Index(o, l6); size_t r_index = Index(i, r0) * r1 * r2 * r3 * r4 * r5 * r6; r_index += Index(j, r1) * r2 * r3 * r4 * r5 * r6; r_index += Index(k, r2) * r3 * r4 * r5 * r6; r_index += Index(l, r3) * r4 * r5 * r6; r_index += Index(m, r4) * r5 * r6; r_index += Index(n, r5) * r6; r_index += Index(o, r6); output[pos] = MaximumFunc(input_x[l_index], input_y[r_index]); } } template void MaximumCPUKernel::BroadcastArithOneScalarOneTensor(const T *input_x, const T *input_y, T *output) { //判断参数是否为空 MS_EXCEPTION_IF_NULL(input_x); MS_EXCEPTION_IF_NULL(input_y); MS_EXCEPTION_IF_NULL(output); if (input_x_shape_.size() == 0) { for (size_t i = 0; i output_num_; ++i) { output[i] = MaximumFunc(input_x[0], input_y[i]); } } else { for (size_t i = 0; i output_num_; ++i) { output[i] = MaximumFunc(input_x[i], input_y[0]); } } } //广播算数张量 template void MaximumCPUKernel::BroadcastArithTensors(const T *input_x, const T *input_y, T *output) { //判断参数sh MS_EXCEPTION_IF_NULL(input_x); MS_EXCEPTION_IF_NULL(input_y); MS_EXCEPTION_IF_NULL(output); for (size_t i = 0; i output_num_; ++i) { output[i] = MaximumFunc(input_x[i], input_y[i]); } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:39:03 2021-11-01 19:39:03 最后回复一鲸落 2021-11-01 19:39:03
316 0

kernel
[活动体验] cpu\maximum_cpu_kernel.cc代码标注1

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\maximum_cpu_kernel.cc代码标注1 ```c++ ** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/maximum_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间内核 template void MaximumCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); input_x_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输入x的形状 input_y_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);//获取输入y的形状 output_shape_ = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);//获取输出的形状 TypeId input_x_dtype = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);//获取输入x的数据类型 TypeId input_y_dtype = AnfAlgo::GetInputDeviceDataType(kernel_node, 1);//获取输入y的数据类型 size_t max_input_shape_size =//获取最大的输入形状大小 input_x_shape_.size() > input_y_shape_.size() ? input_x_shape_.size() : input_y_shape_.size(); for (size_t i = 0; i output_shape_.size(); i++) { output_num_ *= output_shape_[i]; } if ((input_x_shape_.size() == 0 && input_y_shape_.size() != 0) || (input_x_shape_.size() != 0 && input_y_shape_.size() == 0)) { InitInputTensorAndScalar(max_input_shape_size); } else if (max_input_shape_size == output_shape_.size() && output_shape_.size() != 0) { InitInputTensors(input_x_dtype, input_y_dtype); } else {//只支持输入两个张量或者一个张量和一个标量 MS_LOG(EXCEPTION) "Only support input two tensors or one tensor and one scalar"; } } //检查参数 template void MaximumCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); //判断输入值数量是否为2，若不是则记录错误日志：输入值数量为...但是 MaximumCPUKernel 需要的输入值数量为2 if (input_num != 2) { MS_LOG(EXCEPTION) "Input number is " input_num ", but MaximumCPUKernel needs 2 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); //判断输出值数量是否为1，若不是则记录错误日志：输入值数量为...但是 MaximumCPUKernel 需要的输出值数量为1个 if (output_num != 1) { MS_LOG(EXCEPTION) "Output number is " output_num ", but MaximumCPUKernel needs 1 output."; } } //初始化张量和标量 template void MaximumCPUKernel::InitInputTensorAndScalar(size_t max_input_shape_size) { if (max_input_shape_size != output_shape_.size()) {//输出张量大小必须等于输入的最大形状大小 MS_LOG(EXCEPTION) "Output tensor size must be equal to the max shape size of inputs"; } need_broadcast_ = false; } template void MaximumCPUKernel::InitInputTensors(TypeId input_x_dtype, TypeId input_y_dtype) { if (input_x_dtype == kNumberTypeBool && input_y_dtype == kNumberTypeBool) { MS_LOG(EXCEPTION) "Input tensor types cannot be both bool"; } //检查形状是否需要广播 need_broadcast_ = IsBroadcast(); if (need_broadcast_) { InitTensorBroadcastShape(); } } template bool MaximumCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { //转换类型 T *input_x_ = reinterpret_cast(inputs[0]->addr); T *input_y_ = reinterpret_cast(inputs[1]->addr); T *output_ = reinterpret_cast(outputs[0]->addr); BroadcastArith(input_x_, input_y_, output_); return true; } template void MaximumCPUKernel::BroadcastArith(const T *input_x, const T *input_y, T *output) { //判断参数参数是否为空 MS_EXCEPTION_IF_NULL(input_x); MS_EXCEPTION_IF_NULL(input_y); MS_EXCEPTION_IF_NULL(output); if (need_broadcast_) {//参数需要广播 BroadcastArithKernel(broadcast_input_x_shape_[0], broadcast_input_x_shape_[1], broadcast_input_x_shape_[2], broadcast_input_x_shape_[3], broadcast_input_x_shape_[4], broadcast_input_x_shape_[5], broadcast_input_x_shape_[6], broadcast_input_y_shape_[0], broadcast_input_y_shape_[1], broadcast_input_y_shape_[2], broadcast_input_y_shape_[3], broadcast_input_y_shape_[4], broadcast_input_y_shape_[5], broadcast_input_y_shape_[6], broadcast_output_shape_[0], broadcast_output_shape_[1], broadcast_output_shape_[2], broadcast_output_shape_[3], broadcast_output_shape_[4], broadcast_output_shape_[5], broadcast_output_shape_[6], input_x, input_y, output); } else {//参数不需要广播 if (input_x_shape_.size() == 0 || input_y_shape_.size() == 0) { BroadcastArithOneScalarOneTensor(input_x, input_y, output); } else { BroadcastArithTensors(input_x, input_y, output); } } } //判断数值是否经过广播 template bool MaximumCPUKernel::IsBroadcast() { if (input_x_shape_.size() != input_y_shape_.size()) {//如果输入形状不等于输出形状，则数值经过广播 return true; } for (size_t i = 0; i input_x_shape_.size(); i++) { if (input_x_shape_[i] != input_y_shape_[i]) { return true; } } return false; } ```

一鲸落 发表于2021-11-01 19:38:18 2021-11-01 19:38:18 最后回复一鲸落 2021-11-01 19:38:18
326 0

kernel
[活动体验] cpu\map_cache_idx_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\map_cache_idx_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.h" //导入系统自带的库 #include #include #include //导入自定义的库 #include "runtime/device/cpu/cpu_device_address.h" #include "utils/cache_embedding_hashmap_struct.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template //压缩数值 int Compress(HashmapEntry *entry_p, const size_t &length, T entry) { T i = (entry + 1) % length, off = 1; int compress_count = 0;//初始化变量 for (; !entry_p[i].IsEmpty(); i = (i + 1) % length, off++) { if (entry_p[i].tag_ > off) { entry_p[entry].key_ = entry_p[i].key_; entry_p[entry].value_ = entry_p[i].value_; entry_p[entry].step_ = entry_p[i].step_; entry_p[entry].tag_ = entry_p[i].tag_ - off; entry_p[i].SetEmpty(); off = 0; entry = i; } compress_count++; } return compress_count; } //更新形状 void UpdateShape(size_t miss_count, const CNodePtr &node_) { std::vector out_shape; out_shape.emplace_back(miss_count); std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); for (size_t i = 0; i output_num; i++) {//循环遍历数组原地创建临时对象 dtypes.push_back(AnfAlgo::GetOutputInferDataType(node_, i)); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape}, node_.get()); } //初始化内核 void MapCacheIdxCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 node_wpt_ = kernel_node;//获取节点运行时间 auto hashmap_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取hash图的形状 if (hashmap_shape.size() != 2) { MS_LOG(EXCEPTION) "Dimension of HashMap must be 2, (n, 4)";//HashMap的维数必须是2，(n, 4) } hashmap_length_ = hashmap_shape[0]; if (hashmap_length_ = 0) { MS_LOG(EXCEPTION) "Hashmap length must > 0";//Hash图的长度必须大于0 } dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据结构 } //核对数据类型 bool MapCacheIdxCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } else { MS_LOG(ERROR) "Only support int32, int64";//只支持 int32或int64的数据类型 return false; } return true; } //检查内核信息 template void MapCacheIdxCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { auto node_ = node_wpt_.lock();//获取节点个数 auto emb_idx_shape = AnfAlgo::GetPrevNodeOutputInferShape(node_, 1);//获取前一个节点输出信息的形状 batch_size_ = 1;//初始化变量 for (size_t i = 0; i emb_idx_shape.size(); ++i) { batch_size_ *= emb_idx_shape[i]; } HashmapEntry *hashmap = reinterpret_cast *>(inputs[0]->addr); //转换类型 auto input_indices = reinterpret_cast(inputs[1]->addr); T *step_ = reinterpret_cast(inputs[2]->addr); T emb_max_num = *reinterpret_cast(inputs[3]->addr); T offset = *reinterpret_cast(inputs[4]->addr); auto output_cache_idx = reinterpret_cast(outputs[0]->addr); auto output_old_emb_idx = reinterpret_cast(outputs[1]->addr); auto output_miss_emb_idx = reinterpret_cast(outputs[2]->addr); auto output_swap_cache_idx = reinterpret_cast(outputs[3]->addr); std::vector miss_idx; //初始化变量 size_t miss_count = 0; float total_count = 0; int count_size = 0; float hit_count = 0; // 搜索缓存索引 for (size_t i = 0; i batch_size_; ++i) { T key = input_indices[i] - offset; if (key >= emb_max_num || key 0) { output_cache_idx[i] = -1; continue; } T tmp_entry = HashFunc(key, hashmap_length_); size_t count = 1; count_size += 1; while ((!hashmap[tmp_entry].IsEmpty() && !hashmap[tmp_entry].IsKey(key))) { tmp_entry = (tmp_entry + 1) % hashmap_length_; if (count > hashmap_length_) { MS_LOG(EXCEPTION) "Hashmap is full, search cache idx failed, please set a larger vocab_cache_size!";//：Hashmap已满，搜索缓存索引失败，请设置更大的缓存词汇大小 } count += 1; } total_count += count; if (hashmap[tmp_entry].IsEmpty()) { miss_idx.emplace_back(i); output_miss_emb_idx[miss_count] = key; output_cache_idx[i] = -1; miss_count++; } else { hit_count += 1; output_cache_idx[i] = hashmap[tmp_entry].value_; hashmap[tmp_entry].step_ = step_[0]; } } if (miss_count != 0) { MS_LOG(INFO) "Miss count: " miss_count; } if (count_size != 0) { MS_LOG(INFO) "Avg search count: " total_count / count_size; MS_LOG(INFO) "Cache hit rate: " hit_count / count_size; } float total_insert_count = 0; float total_delete_count = 0; //交换 hash map for (size_t i = 0; i miss_count; ++i) { T emb_idx = output_miss_emb_idx[i]; T entry = HashFunc(emb_idx, hashmap_length_); size_t tag_count = 1; while (!hashmap[entry].IsEmpty()) { entry = (entry + 1) % hashmap_length_; if (tag_count > hashmap_length_) { MS_LOG(EXCEPTION) "Hashmap is full, insert new key failed, please set a larger vocab_cache_size!";//Hashmap已满，插入新键失败，请设置较大的vocab_cache_size } tag_count++; } hashmap[entry].key_ = emb_idx; hashmap[entry].step_ = step_[0]; hashmap[entry].tag_ = tag_count; T tmp_entry = (entry + 1) % hashmap_length_; size_t delete_count = 1; while (hashmap[tmp_entry].IsEmpty() || hashmap[tmp_entry].IsUsing(step_[0])) { tmp_entry = (tmp_entry + 1) % hashmap_length_; if (delete_count > hashmap_length_) { MS_LOG(EXCEPTION) "Hashmap is full, delete old key failed, please set a larger vocab_cache_size!";//Hashmap已满，删除旧键失败，请设置更大的缓存词汇大小 } delete_count++; } output_swap_cache_idx[i] = hashmap[tmp_entry].value_; output_old_emb_idx[i] = hashmap[tmp_entry].key_; hashmap[entry].value_ = output_swap_cache_idx[i]; hashmap[tmp_entry].SetEmpty(); int compress_count = Compress(hashmap, hashmap_length_, tmp_entry); total_delete_count += (compress_count + delete_count);//总共删除文件 total_insert_count += tag_count;//总共插入文件s } if (miss_count != 0) { MS_LOG(INFO) "Insert count: " total_insert_count / miss_count; MS_LOG(INFO) "Delete count: " total_delete_count / miss_count; } step_[0] += 1; for (size_t i = 0; i miss_count; ++i) { output_cache_idx[miss_idx[i]] = output_swap_cache_idx[i]; } UpdateShape(miss_count, node_);//跟新形状 } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:37:41 2021-11-01 19:37:41 最后回复一鲸落 2021-11-01 19:37:41
224 0

kernel
[活动体验] cpu\layer_norm_grad_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\layer_norm_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/layer_norm_grad_cpu_kernel.h" #include "backend/kernel_compiler/common_utils.h" #include "runtime/device/cpu/cpu_device_address.h" #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void LayerNormGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核 CheckParam(kernel_node);//检查参数 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取数据类型 std::vector x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取参数x的形状 auto begin_norm_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis");//获取begin_norm_axis节点地址 auto begin_params_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis");//获取begin_params_axis节点地址 if (begin_norm_axis 0) { begin_norm_axis += x_shape.size(); } if (begin_params_axis 0) { begin_params_axis += x_shape.size(); } //计算代码块大小 for (size_t i = 0; i IntToSize(begin_norm_axis); i++) { block_num_ *= x_shape[i]; } for (size_t i = IntToSize(begin_norm_axis); i x_shape.size(); i++) { block_size_ *= x_shape[i]; } //计算参数的数量 for (size_t i = 0; i IntToSize(begin_params_axis); i++) { param_size_ *= x_shape[i]; } for (size_t i = begin_params_axis; i x_shape.size(); i++) { param_num_ *= x_shape[i]; } if (block_num_ = 0 || block_size_ = 0) { MS_LOG(EXCEPTION) "LayerNormGradCPUKernel input shape error, input shape: " x_shape; } } //核对数据类型 bool LayerNormGradCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { LaunchKernel(inputs, workspace, outputs); } else {//输入数据类型只支持float16, float32, float64 MS_LOG(EXCEPTION) "input dtype only support float16, float32, float64"; } return true; } template void LayerNormGradCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { //转换参数类型 auto x = reinterpret_cast(inputs[0]->addr); auto dy = reinterpret_cast(inputs[1]->addr); auto var = reinterpret_cast(inputs[2]->addr); auto mean = reinterpret_cast(inputs[3]->addr); auto gamma = reinterpret_cast(inputs[4]->addr); auto dx = reinterpret_cast(outputs[0]->addr); auto dg = reinterpret_cast(outputs[1]->addr); auto db = reinterpret_cast(outputs[2]->addr); size_t thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();//获取线程数量 auto thread_num1 = param_num_ thread_num ? param_num_ : thread_num;//线程数量1取thread_num 和 param_num_小的一个 std::vector tasks1; tasks1.reserve(thread_num1); auto thread_num2 = block_num_ thread_num ? block_num_ : thread_num;//线程数量1取thread_num 和 block_num_小的一个 std::vector tasks2; tasks2.reserve(thread_num2); auto task1 = [&](size_t start, size_t end) { for (size_t c = 0; c ceil(static_cast(param_num_) / thread_num1); ++c) { if (c * thread_num1 + start >= param_num_) { continue; } size_t param_index = c * thread_num1 + start; T dgamma = (T)0.0;//初始化变量 T dbeta = (T)0.0;//初始化变量 for (size_t j = param_index; j param_size_ * param_num_; j += param_num_) { auto norm_shift = static_cast(j / block_size_); dgamma += dy[j] * (T)std::pow(static_cast(var[norm_shift]) + eps_, -0.5) * (x[j] - mean[norm_shift]); dbeta += dy[j]; } dg[param_index] = dgamma; db[param_index] = dbeta; } }; auto task2 = [&](size_t start, size_t end) { for (size_t c = 0; c ceil(static_cast(block_num_) / thread_num2); ++c) { if (c * thread_num2 + start >= block_num_) { continue; } size_t block_index = c * thread_num2 + start; T sum1 = (T)0.0; T sum2 = (T)0.0; T sum3 = (T)0.0; for (size_t j = block_index * block_size_; j (block_index + 1) * block_size_; ++j) { auto param_shift = j % param_num_; auto norm_shift = static_cast(j / block_size_); auto dxm = x[j] - mean[norm_shift]; auto dyg = dy[j] * gamma[param_shift]; sum1 += (T)(-0.5) * dyg * dxm * (T)std::pow(static_cast(var[norm_shift]) + eps_, -1.5); sum2 += dyg; sum3 += (T)(-2.0) * dxm; } for (size_t j = block_index * block_size_; j (block_index + 1) * block_size_; ++j) { auto param_shift = j % param_num_; auto norm_shift = static_cast(j / block_size_); auto var_sqrt = (T)std::pow(static_cast(var[norm_shift]) + eps_, -0.5); auto dx1 = dy[j] * gamma[param_shift] * var_sqrt; auto dx2 = sum1 * (T)2.0 / block_size_ * (x[j] - mean[norm_shift]); auto dx3 = ((T)(-1.0) * var_sqrt * sum2 + ((T)1.0 / block_size_) * sum1 * sum3) * ((T)1.0 / block_size_); dx[j] = dx1 + dx2 + dx3; } } }; for (size_t i = 0; i thread_num1; ++i) { auto block = [&, i]() { task1(i, i + 1); return common::SUCCESS; }; tasks1.emplace_back(block); } common::ThreadPool::GetInstance().SyncRun(tasks1); for (size_t i = 0; i thread_num2; ++i) { auto block = [&, i]() { task2(i, i + 1); return common::SUCCESS; }; tasks2.emplace_back(block);//原地创建一个对象block } common::ThreadPool::GetInstance().SyncRun(tasks2); } void LayerNormGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 5) {//LayerNormCPUKernel需要5个输入，但是获取的输入值数量是. MS_LOG(EXCEPTION) "LayerNormGradCPUKernel needs 5 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 3) {//LayerNormCPUKernel期待3个输出，但是获取的输c值数量是... MS_LOG(EXCEPTION) "LayerNormGradCPUKernel expects 3 output, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:37:10 2021-11-01 19:37:10 最后回复一鲸落 2021-11-01 19:37:10
218 0

数据结构 kernel
[活动体验] cpu\layer_norm_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\layer_norm_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的的库 #include //导入自定义的库 #include "backend/kernel_compiler/cpu/layer_norm_cpu_kernel.h" #include "backend/kernel_compiler/common_utils.h" #include "runtime/device/cpu/cpu_device_address.h" #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void LayerNormCPUKernel::InitKernel(const CNodePtr &kernel_node) {//初始化内核 CheckParam(kernel_node);//检查内核参数 dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取节点数据类型 std::vector x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取变量形状 auto begin_norm_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_norm_axis");//获取节点地址 auto begin_params_axis = AnfAlgo::GetNodeAttr(kernel_node, "begin_params_axis");//获取节点地址 if (begin_norm_axis 0) { begin_norm_axis += x_shape.size(); } if (begin_params_axis 0) { begin_params_axis += x_shape.size(); } //计算代码块大小 for (size_t i = 0; i IntToSize(begin_norm_axis); i++) { block_num_ *= x_shape[i]; } for (size_t i = IntToSize(begin_norm_axis); i x_shape.size(); i++) { block_size_ *= x_shape[i]; } //计算参数的数量 for (size_t i = IntToSize(begin_params_axis); i x_shape.size(); i++) { param_num_ *= x_shape[i]; } if (block_num_ = 0 || block_size_ = 0) { MS_LOG(EXCEPTION) "LayerNormCPUKernel input shape error, input shape: " x_shape; } } //核对数据类型 bool LayerNormCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat64) { LaunchKernel(inputs, outputs); } else {//输入数据类型只支持float16, float32, float64 MS_LOG(EXCEPTION) "input dtype only support float16, float32, float64"; } return true; } template void LayerNormCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { size_t f_size = sizeof(T);//T类型所占字节数 if (inputs[1]->size != f_size * param_num_ || inputs[2]->size != f_size * param_num_) { MS_LOG(EXCEPTION) "The product of gamma and beta's shape must be " param_num_;//伽马和β的形状必须是 param_num } if (outputs[1]->size != f_size * block_num_ || outputs[2]->size != f_size * block_num_) { MS_LOG(EXCEPTION) "The product of mean and var's shape must be " block_num_;//均值和var的乘积的形状必须是block_num } //转换参数类型 auto x = reinterpret_cast(inputs[0]->addr); auto gamma = reinterpret_cast(inputs[1]->addr); auto beta = reinterpret_cast(inputs[2]->addr); auto y = reinterpret_cast(outputs[0]->addr); auto mean = reinterpret_cast(outputs[1]->addr); auto var = reinterpret_cast(outputs[2]->addr); size_t thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); if (block_num_ thread_num) { thread_num = block_num_; } std::vector tasks; tasks.reserve(thread_num); auto task = [&](size_t start, size_t end) { for (size_t c = 0; c ceil(static_cast(block_num_) / thread_num); ++c) { if (c * thread_num + start >= block_num_) { continue; } size_t i = c * thread_num + start; T sum = (T)0.0; T square_sum = (T)0.0; for (size_t j = i * block_size_; j (i + 1) * block_size_; ++j) { sum += x[j]; square_sum += x[j] * x[j]; } T block_mean = sum / block_size_;//模块均值 T block_var = square_sum / block_size_ - block_mean * block_mean;//模块附加值 for (size_t j = i * block_size_; j (i + 1) * block_size_; ++j) { auto param_shift = j % param_num_; y[j] = (x[j] - block_mean) / (T)std::sqrt(static_cast(block_var) + eps_) * gamma[param_shift] + beta[param_shift]; } mean[i] = block_mean; var[i] = block_var; } }; for (size_t i = 0; i thread_num; ++i) { auto block = [&, i]() { task(i, i + 1); return common::SUCCESS; }; tasks.emplace_back(block); } common::ThreadPool::GetInstance().SyncRun(tasks); } //检查参数 void LayerNormCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) {//LayerNormCPUKernel需要三个输入，但是获取的输入值数量是... MS_LOG(EXCEPTION) "LayerNormCPUKernel needs 3 inputs, but gets " input_num; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 3) {//LayerNormCPUKernel期待三个输出，但是获取的输c值数量是.. MS_LOG(EXCEPTION) "LayerNormCPUKernel expects 3 output, but gets" output_num; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:36:36 2021-11-01 19:36:36 最后回复一鲸落 2021-11-01 19:36:36
337 0

数据结构 kernel
[活动体验] cpu\l2_normalize_cpu_kernel.cc代码评注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\l2_normalize_cpu_kernel.cc代码评注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/l2_normalize_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void L2NormalizeCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 epsilon_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "epsilon"));//获取内核地址 axis_ = LongToInt(AnfAlgo::GetNodeAttr(kernel_node, "axis"));//获取行的数量 input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输入值的形状 output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);//获取输出值的形状 CheckParam(kernel_node);//检查内核参数 if (axis_ 0) {//如果行数小于0，行的 axis_ += SizeToInt(input_shape_.size()); } } template void L2NormalizeCPUKernel::CalcDenominator(const T *input_addr, const size_t reduce_size, const int dims, std::unique_ptr *denominator_addr) { // 计算转置轴和步幅 size_t stride = 1; std::vector axes(input_shape_.size()); int k = 0; for (int i = 0; i dims; ++i) { if (i != axis_) { axes[k] = i; ++k; } else { stride *= input_shape_[i]; } } axes[k] = axis_; std::vector transpose_shape(input_shape_.size()); for (int i = 0; i dims; ++i) { transpose_shape[i] = input_shape_[axes[i]]; } TransposeIterator tran_base_iter(std::move(transpose_shape), std::move(axes), input_shape_); auto task = [&](size_t start, size_t end) { T temp = (T)0.0;//初始化变量 T denominator = (T)0.0;//初始化变量 auto iter = tran_base_iter;//起别名 iter.SetPos(start * stride); for (size_t i = start; i end; ++i) {//循环遍历数组，计算分母大小 denominator = input_addr[iter.GetPos()];//获取分母数 denominator = denominator * denominator; iter.GenNextPos(); for (size_t j = 1; j stride; ++j) {//循环遍历数组 temp = input_addr[iter.GetPos()];//临时值 denominator += temp * temp; iter.GenNextPos(); } denominator = (denominator > epsilon_) ? denominator : epsilon_; (*denominator_addr)[i] = sqrt(denominator); } }; CPUKernelUtils::ParallelFor(task, reduce_size); } template void L2NormalizeCPUKernel::CalcOutput(const T *input_addr, const std::vector reduce_shape, const size_t output_size, T *output_addr, std::unique_ptr const &denominator_addr) { BroadcastIterator broad_base_iter(input_shape_, reduce_shape, output_shape_); auto task = [&](size_t start, size_t end) { auto iter = broad_base_iter; iter.SetPos(start); for (size_t i = start; i end; ++i) { T dividend = input_addr[iter.GetInputPosA()];//获取被除数大小 T divisor = denominator_addr[iter.GetInputPosB()];//获取除数大小 if (divisor == (T)0) { if (dividend == (T)0) {//除数与被除数都为0 output_addr[i] = std::numeric_limits::quiet_NaN(); continue; } if (std::numeric_limits::has_infinity) { output_addr[i] = dividend > (T)0 ? std::numeric_limits::infinity() : -std::numeric_limits::infinity(); } else { output_addr[i] = dividend > (T)0 ? std::numeric_limits::max() : std::numeric_limits::min(); } continue; } output_addr[i] = dividend / divisor;//输出值 iter.GenNextPos(); } }; CPUKernelUtils::ParallelFor(task, output_size); } template bool L2NormalizeCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { auto input_addr = reinterpret_cast(inputs[0]->addr);//转换输入值地址的数据类型 auto output_addr = reinterpret_cast(outputs[0]->addr);//转换输出值地址的数据类型 int dims = input_shape_.size(); std::vector reduce_shape = input_shape_; size_t reduce_size = 1;//初始化变量 reduce_shape[axis_] = 1;//初始化变量 for (int i = 0; i dims; ++i) { reduce_size *= reduce_shape[i]; } auto denominator_addr = std::make_unique(reduce_size);//获取分母地址 L2NormalizeCPUKernel::CalcDenominator(input_addr, reduce_size, dims, &denominator_addr); size_t output_size = outputs[0]->size / sizeof(T);//获取输出值大小 L2NormalizeCPUKernel::CalcOutput(input_addr, reduce_shape, output_size, output_addr, denominator_addr); return true; } template void L2NormalizeCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入值 int dims = SizeToInt(input_shape_.size()); if (input_num != 1) {//判断输入值的数量是否为1，若不是则记录错误日志：输入值为...但是 L2NormalizeCPUKernel只需要一个输入值 MS_LOG(EXCEPTION) "Input number is " input_num ", but L2NormalizeCPUKernel needs 1 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//判断输出值的数量是否为1，若不是则记录错误日志：输入值为...但是 L2NormalizeCPUKernel只需要一个输出值 MS_LOG(EXCEPTION) "Output number is " output_num ", but L2NormalizeCPUKernel needs 1 output."; } if (axis_ -dims || axis_ >= dims) { MS_LOG(EXCEPTION) "Attr axis_ " axis_ " must be in " -dims "~" dims; } if (epsilon_ == (T)0.0) { MS_LOG(EXCEPTION) "Attr epsilon can not be zero.";//epsilond不能为零 } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 19:35:14 2021-11-01 19:35:14 最后回复一鲸落 2021-11-01 19:35:14
330 0

数据结构 kernel
[活动体验] cpu\isfinite_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\isfinite_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/isfinite_cpu_kernel.h" //导入系统自带的库 #include //导入自定义的库 #include "abstract/utils.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void IsFiniteCPUKernel::InitKernel(const CNodePtr &kernelNode) {//初始化内核 MS_EXCEPTION_IF_NULL(kernelNode);//判断内核节点是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernelNode);//获取输入值 if (input_num != 1) {//判断输入值数量是否为1，若不是则记录错误日志：输入值为...但是IsFiniteCPUKernel需要一个输入 MS_LOG(EXCEPTION) "Input number is " input_num ", but IsFiniteCPUKernel needs 1 inputs."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernelNode); if (output_num != 1) {//判断输出值数量是否为1，若不是则记录错误日志：输入值为...但是IsFiniteCPUKernel需要一个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but IsFiniteCPUKernel needs 1 output."; } input_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernelNode, 0); if (dtype_map_.find(input_dtype_) == dtype_map_.end()) { MS_LOG(EXCEPTION) "Unsupported input type found.";//发现不支持的数据类型 } } //核对数据类型，根据不同数据类型执行相应操作，若是不存在的数据类型，则记录错误日志 bool IsFiniteCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (input_dtype_ == kNumberTypeFloat16) { LaunchKernelFloat16(inputs, outputs); } else if (input_dtype_ == kNumberTypeFloat32 || input_dtype_ == kNumberTypeFloat) { LaunchKernelFloat(inputs, outputs); } else if (input_dtype_ == kNumberTypeFloat64) { LaunchKernelFloat(inputs, outputs); } else if (dtype_map_.find(input_dtype_) != dtype_map_.end()) { LaunchKernelOther(inputs, outputs); } else {//只支持 bool, int, uint, float数据类型但是实际的数据类型是... MS_LOG(EXCEPTION) "Only support bool, int, uint, float, but actual data type is " TypeIdLabel(input_dtype_); } return true; } //核对Float16类型的内核 void IsFiniteCPUKernel::LaunchKernelFloat16(const std::vector &inputs, const std::vector &outputs) { float16 *input = reinterpret_cast(inputs[0]->addr)//转换输入的数据类型为float16 bool *output = reinterpret_cast(outputs[0]->addr);//转换输出的地址数据类型 size_t elem_num = inputs[0]->size / sizeof(float16);//计算输入值所占几个float16大小 for (size_t i = 0; i elem_num; i++) {//循环遍历数组 float temp_num = static_cast(input[i]); output[i] = !std::isinf(temp_num) && !std::isnan(temp_num);//输出值 } } //核对Float类型的内核 template void IsFiniteCPUKernel::LaunchKernelFloat(const std::vector &inputs, const std::vector &outputs) { T *input = reinterpret_cast(inputs[0]->addr);//转换输入的数据类型为T bool *output = reinterpret_cast(outputs[0]->addr);//转换输出的地址数据类型 size_t elem_num = inputs[0]->size / sizeof(T); for (size_t i = 0; i elem_num; i++) { output[i] = !std::isinf(input[i]) && !std::isnan(input[i]); } } //核对其他类型的内核 void IsFiniteCPUKernel::LaunchKernelOther(const std::vector &inputs, const std::vector &outputs) { bool *output = reinterpret_cast(outputs[0]->addr);//转换输出值地址的数据类型 auto type_iter = dtype_map_.find(input_dtype_);//q size_t elem_num = inputs[0]->size / (type_iter->second); for (size_t i = 0; i elem_num; i++) { output[i] = true; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:46:22 2021-11-01 18:46:22 最后回复一鲸落 2021-11-01 18:46:22
320 0

数据结构 kernel
[活动体验] cpu\iou_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\iou_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的库 #include "backend/kernel_compiler/cpu/iou_cpu_kernel.h" //导入系统自带得库 #include #include #include //导入自定义的库 #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" #include "runtime/device/cpu/cpu_device_address.h" #include "utils/ms_utils.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void IOUCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 auto anchor_boxes_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取盒子形状 if (anchor_boxes_shape.size() != 2 || anchor_boxes_shape[1] != 4) { MS_LOG(EXCEPTION) "The anchor_boxes shape should be [N, 4].";//锚盒形状应该是[N, 4] } anchor_boxes_size_ = anchor_boxes_shape[0]; auto gt_boxes_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (gt_boxes_shape.size() != 2 || gt_boxes_shape[1] != 4) { MS_LOG(EXCEPTION) "The gt_boxes shape should be [N, 4].";/gt_boxes形状应该是[N, 4] } gt_boxes_size_ = gt_boxes_shape[0]; iou_size_ = anchor_boxes_size_ * gt_boxes_size_; std::string iou_mode = AnfAlgo::GetNodeAttr(kernel_node, "mode"); if (iou_mode != "iou" && iou_mode != "iof") {//判断模型类型是否是iou类型或者iof类型 MS_LOG(EXCEPTION) "IOU mode should be 'iou', 'iof'."; } if (iou_mode == "iof") { mode_ = 1; } } //检查模板 template bool IOUCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() != 2) {//判断输入值是否为2 MS_LOG(EXCEPTION) "Input number is " inputs.size() ", but IOU needs 2 inputs."; } if (outputs.size() != 1) {//判断输入值是否为1 MS_LOG(EXCEPTION) "Output number is " outputs.size() ", but IOU needs 1 outputs."; } auto anchor_boxes = reinterpret_cast(inputs[0]->addr);//转换anchor盒子地址类型 auto gt_boxes = reinterpret_cast(inputs[1]->addr);//转换gt_盒子地址类型 auto iou_score = reinterpret_cast(outputs[0]->addr);//转换iou分数地址类型 //多线程 auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { int idx1 = i % anchor_boxes_size_ * 4; int idx2 = i / anchor_boxes_size_ * 4; T I_x0 = std::max(anchor_boxes[idx1], gt_boxes[idx2]); T I_y0 = std::max(anchor_boxes[idx1 + 1], gt_boxes[idx2 + 1]); T I_x1 = std::min(anchor_boxes[idx1 + 2], gt_boxes[idx2 + 2]); T I_y1 = std::min(anchor_boxes[idx1 + 3], gt_boxes[idx2 + 3]); T overlaps = std::max(T(0), (I_x1 - I_x0 + T(1)) * (I_y1 - I_y0 + T(1))); T area1 = (anchor_boxes[idx1 + 2] - anchor_boxes[idx1] + T(1)) * (anchor_boxes[idx1 + 3] - anchor_boxes[idx1 + 1] + T(1)); T area2 = (gt_boxes[idx2 + 2] - gt_boxes[idx2] + T(1)) * (gt_boxes[idx2 + 3] - gt_boxes[idx2 + 1] + T(1)); if (mode_ == 0) { iou_score[i] = overlaps / (area1 + area2 - overlaps + T(1e-10)); } else { iou_score[i] = overlaps / (area2 + T(1e-10)); } } }; CPUKernelUtils::ParallelFor(task, iou_size_); return true; } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:45:51 2021-11-01 18:45:51 最后回复一鲸落 2021-11-01 18:45:51
224 0

kernel
[活动体验] cpu\hsigmoid_cpu_kernel.cc代码标注

# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\hsigmoid_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/hsigmoid_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" //导入系统自导的包 #include namespace mindspore {//声明一个变量 namespace kernel {//空间嵌套 template void HSigmoidCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node);//检查节点参数 x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取行的大小 for (const uint64_t &d : x_shape_) {//计算得变量大小 tensor_size_ *= d; } } template bool HSigmoidCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { auto x = reinterpret_cast(inputs[0]->addr);//转换输出值类型 auto y = reinterpret_cast(outputs[0]->addr);//转换输入值类型 auto task = [&](size_t start, size_t end) {//任务大小 for (uint64_t i = start; i end; ++i) { if (x[i] = -3) { y[i] = 0; } else if (x[i] >= 3) { y[i] = 1; } else { y[i] = (x[i] + 3) / 6; } } }; CPUKernelUtils::ParallelFor(task, tensor_size_); return true; } template void HSigmoidCPUKernel::CheckParam(const CNodePtr &kernel_node) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输出值大小 if (input_num != 1) {//如果输入值不等于1.记录错误日志：输r值是... 但是 HSigmoid得cpu内核需要一个 MS_LOG(EXCEPTION) "Input number is " input_num ", but HSigmoidCPUKernel needs 1 input."; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//如果输出值不等于1.记录错误日志：输出值是... 但是 HSigmoid得cpu内核需要一个 MS_LOG(EXCEPTION) "Output number is " output_num ", but HSigmoidCPUKernel needs 1 output."; } } } // namespace kernel } // namespace mindspore ```

一鲸落 发表于2021-11-01 18:44:59 2021-11-01 18:44:59 最后回复一鲸落 2021-11-01 18:44:59
341 0

kernel

上滑加载中

推荐直播

热门标签

Java Python 数据结构数据库 Linux 机器学习网络任务调度 MySQL JavaScript