-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/cpu_kernel.h" //导入系统自带的头文件 #include #include //导入自定义的头文件 #include "common/thread_pool.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是否为空 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);//获取输入值 for (size_t input_index = 0; input_index input_num; ++input_index) {//循环遍历数组, TypeId type_id = AnfAlgo::GetInputDeviceDataType(kernel_node, input_index);//获取数据类型名 size_t type_size = GetTypeByte(TypeIdToType(type_id));//获取数据类型大小 std::vector shape = AnfAlgo::GetInputDeviceShape(kernel_node, input_index); size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); tensor_size = std::max(tensor_size, type_size);//获取张量大小 input_size_list_.emplace_back(tensor_size);//直接原地构造对象 } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);//获取输出值 for (size_t output_index = 0; output_index output_num; ++output_index) {//循环遍历数组, TypeId type_id = AnfAlgo::GetOutputDeviceDataType(kernel_node, output_index);//获取数据类型名 size_t type_size = GetTypeByte(TypeIdToType(type_id));//获取数据类型大小 std::vector shape = AnfAlgo::GetOutputDeviceShape(kernel_node, output_index); size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); tensor_size = std::max(tensor_size, type_size);//获取张量大小 output_size_list_.emplace_back(tensor_size);//直接原地构造对象 } } void CPUKernel::Init(const CNodePtr &kernel_node) { InitKernel(kernel_node);//函数调用 InitInputOutputSize(kernel_node); } void CPUKernelUtils::ExpandDimsTo4(std::vector *shape) { auto len = shape->size();//获取数据长度 if (len 4) {//若数据长度小于4 for (size_t i = 0; i 4 - len; ++i) {//循环遍历数组插入数据 shape->insert(shape->begin(), 1); } } } size_t CPUKernelUtils::CalcOffset(const std::vector &shape, size_t dim0, size_t dim1, size_t dim2, size_t dim3) { size_t offset = dim0 * shape[1] * shape[2] * shape[3] + dim1 * shape[2] * shape[3] + dim2 * shape[3] + dim3;//获取偏移值大小 return offset;//返回偏移值 } size_t CPUKernelUtils::GetElementNumOnAxis(const std::vector &shape, int axis) { if (axis 0) { axis = axis + SizeToInt(shape.size()); } size_t result = 1; for (int j = 3; j > axis; --j) {//循环遍历数组计算形状大小 result *= shape[j]; } return result; } void CPUKernelUtils::GetElementNumEveryDim(const std::vector &shape, std::vector *element_num) { size_t accumulation = 1;// 初始化变量 element_num->emplace_back(1);//添加元素1 for (size_t i = shape.size() - 1; i > 0; --i) { accumulation *= shape[i];//计算得形状大小 element_num->emplace_back(accumulation); } std::reverse(element_num->begin(), element_num->end()); } void CPUKernelUtils::ParallelFor(const CTask &task, size_t count) { auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); const float block_size = 128.0;//初始化变量 size_t thread_num = count block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num; std::vector tasks;//获取线程数 size_t start = 0;//初始化变量 size_t once_compute_size = (count + thread_num - 1) / thread_num; while (start count) { size_t end = (start + once_compute_size) > count ? count : (start + once_compute_size); auto block = [&, start, end]() { task(start, end); return common::SUCCESS;//返回值 }; tasks.emplace_back(block);//原地构造block对象 start += once_compute_size; } common::ThreadPool::GetInstance().SyncRun(tasks); } std::vector CPUKernelUtils::FlatShapeByAxis(const std::vector &shape, int axis) { if (axis 0) { axis = axis + SizeToInt(shape.size()); } size_t dim_row = 1;//初始化变量 size_t dim_col = 1;//初始化变量 std::vector flat_shape; for (size_t i = 0; i shape.size(); ++i) { if (SizeToInt(i) axis) { dim_row *= shape[i];//计算行d } else { dim_col *= shape[i]; } } flat_shape.push_back(dim_row); flat_shape.push_back(dim_col); return flat_shape; } ```
-
```C++ // ifndef 防止双重定义, // 并用宏定义定义MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_ // 导入系统自带的标准库函数 #include #include #include // 导入自定义的头文件 #include "backend/kernel_compiler/kernel_build_info.h" // 双重命名空间 namespace mindspore { namespace kernel { // 只有一个函数声明 void HostMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list); } // 命名空间 kernel } // 命名空间 mindspore #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_HOST_HOST_KERNEL_META_DATA_H_ host_kernel_metadata.cc // 导入一一对应的头文件 #include "backend/kernel_compiler/host/host_kernel_metadata.h" // 导入系统内部的标准库函数 #include #include // 导入自定义的头文件 #include "backend/kernel_compiler/oplib/oplib.h" #include "backend/kernel_compiler/common_utils.h" #include "backend/session/anf_runtime_algorithm.h" // 双重定义命名空间 namespace mindspore { namespace kernel { /* constexpr可以用于含参和无参函数。 constexpr函数适用于常量表达式,只有在下面的情况下编译器才会接受constexpr函数: 1.函数体必须足够简单,除了typedef和静态元素,只允许有return语句。 如构造函数只能有初始化列表,typedef和静态元素 (实际上在C++14标准中已经允许定义语句存在于constexpr函数体内了) 2.参数和返回值必须是字面值类型,constexpr修饰的函数,返回值不一定是编译期常量 */ constexpr auto kDynamicShape = "DynamicShape"; // 对.h文件中唯一的声明函数进行实例化 void HostMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { MS_LOG(INFO) "HostMetadataInfo."; // 主机的数据信息 MS_EXCEPTION_IF_NULL(kernel_node); MS_EXCEPTION_IF_NULL(kernel_info_list); std::string op_name = AnfAlgo::GetCNodeName(kernel_node); // 判断主机是否进行操作 if (op_name != kDynamicShape) { MS_LOG(DEBUG) "Host does not have op [" op_name "]"; return; } // 创建俩个输入list std::vector inputs_format{}; std::vector inputs_type{}; // size_t == unsigned int 可能会提高代码的可移植性、有效性或者可读性,或许同时提高这三者 size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); for (size_t input_index = 0; input_index input_num; ++input_index) { // 遍历kernel_node inputs_format.emplace_back(kOpFormat_DEFAULT); // 尾部插入kOpFormat_DEFAULT // 尾部插入kernel_node和input_index inputs_type.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index)); } // 创建俩个输出list,与上面的input完全一致 std::vector outputs_format; std::vector outputs_type; size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); for (size_t output_index = 0; output_index output_num; ++output_index) { outputs_format.emplace_back(kOpFormat_DEFAULT); outputs_type.push_back(AnfAlgo::GetOutputInferDataType(kernel_node, output_index)); } // 创建一个builder,把上面的inputs_format,outputs_type等等按照不同的类别依此传入进去 auto builder = KernelBuildInfo::KernelBuildInfoBuilder(); builder.SetInputsFormat(inputs_format); builder.SetInputsDeviceType(inputs_type); builder.SetOutputsFormat(outputs_format); builder.SetOutputsDeviceType(outputs_type); builder.SetKernelType(HOST_KERNEL); // 链表尾部插入bulider的Build函数 kernel_info_list->push_back(builder.Build()); } } // 命名空间 kernel } // 命名空间 mindspore ```
-
书接[TrustieForge](https://forum.trustie.net/forums/4442/detail),继续对kernel_graph.cc中的主要代码进行分析 ###### 11 GetRefCorrespondOutput 获取输出匹配节点对 ```cpp bool KernelGraph::IsInRefOutputMap(const AnfWithOutIndex &pair) const { return ref_out_in_map_.count(pair) != 0; } ``` ###### 12 AddRefCorrespondPairs 添加匹配节点对 ```cpp void KernelGraph::AddRefCorrespondPairs(const AnfWithOutIndex &final_pair, const AnfWithOutIndex &origin_pair) { //现在map中查找 如果已经存在就不用添加 if (IsInRefOutputMap(final_pair)) { MS_LOG(EXCEPTION) "Out_pair is already in RefOutputMap"; } (void)ref_out_in_map_.insert(std::make_pair(final_pair, origin_pair)); } ``` ###### 13 std::map GetRefMap() 获取ref_out_in_map_映射表。 ###### 14 executable() 检查图是否可执行,只有可执行的图才会在GE层编译运行 ```cpp bool executable() const { return executable_; } ``` ###### 15 set_executable() 将计算图设置为可执行。 ```cpp set_executable(bool executable) { executable_ = executable; } ``` ###### 16 ReplaceNode(NotNull old_anf_node, NotNull new_anf_node) 替换计算图中的节点 ```cpp /* old_anf_node:需要替代的旧anf节点 new_anf_node:新的anf节点 */ void KernelGraph::ReplaceNode(NotNull old_anf_node, NotNull new_anf_node) { MS_EXCEPTION_IF_NULL(inputs_); { std::queue seed_nodes; UpdateNodeEdgeList(&seed_nodes); } //创建迭代器it auto it = node_output_edges_.find(old_anf_node); if (it != node_output_edges_.end()) { const auto &outputs = it->second; for (auto &output_node : outputs) { MS_EXCEPTION_IF_NULL(output_node.first); auto output_cnode = output_node.first->cast(); MS_EXCEPTION_IF_NULL(output_cnode); auto &output_node_inputs = output_cnode->inputs(); //如果节点是控制节点(output_node.second == 0) 不进行替代 if (output_node.second == 0) { continue; } for (size_t i = 1; i output_node_inputs.size(); i++) { if (output_node_inputs[i] == old_anf_node.get()) { output_cnode->set_input(i, new_anf_node); } } } // update front to backend map FrontBackendlMapUpdate(old_anf_node, new_anf_node); } { std::queue seed_nodes; UpdateNodeEdgeList(&seed_nodes); } } ``` ###### 17 GetLeafGraphOrder() { 计算根图的叶图图序 ```cpp std::vector> KernelGraph::GetLeafGraphOrder() { //初始化叶图图序 std::vector> leaf_graph_order; if (IsLeafGraph()) { leaf_graph_order.push_back(shared_from_this()->cast()); } else { for (const auto &child_graph : child_graph_order_) { std::shared_ptr child_graph_ptr = child_graph.lock(); MS_EXCEPTION_IF_NULL(child_graph_ptr); auto child_leaf_graph_order = child_graph_ptr->GetLeafGraphOrder(); std::copy(child_leaf_graph_order.begin(), child_leaf_graph_order.end(), std::back_inserter(leaf_graph_order)); } } return leaf_graph_order; } ``` ###### 18 IsLeafGraph() 检查当前图是否为叶图 ```cpp bool KernelGraph::IsLeafGraph() const { //如果是叶节点 出度应该为0 return child_graph_order_.empty(); } ``` ###### 19 FindNodeByPrimitive() 从当前计算图中寻找ANF节点 ```cpp std::vector KernelGraph::FindNodeByPrimitive(const std::vector &primitive_list) const { std::vector result; for (const auto &anf : execution_order_) { for (const auto &primitive : primitive_list) { if (AnfAlgo::CheckPrimitiveType(anf, primitive) && AnfAlgo::GetGraphId(anf.get()) == graph_id_) { result.push_back(anf->cast()); } } } return result; } ``` ###### 20 ToString() 转存MindIR ```cpp std::string KernelGraph::ToString() const { //将graph转为字符串形式进行存储 return std::string("kernel_graph_").append(std::to_string(graph_id_)); } ```
-
首先根据之前的学习,我们已经知道了ANF是MindSpore中计算图的表示形式,它存在于前后端,在ME层的pipeline中经过优化,之后会在GE层编译运行。 在本博客中,多次出现了front_anf和backend_anf。front_anf指的是在ME层优化过的计算图,而backend_anf则是传递到后端的ANF,它以算子的形式存在,但是由于二者有对应的关系,所以我们还是叫它后端ANF。 本代码中经常使用到map.find()函数,在此我们先行讲解。 > map::find()是C++ STL中的内置函数,该函数返回一个迭代器或常量迭代器,该迭代器或常量迭代器引用键在映射中的位置。如果键不存在于Map容器中,则它返回引用map.end()的迭代器或常量迭代器。 > > **用法:** > > ``` > iterator map_name.find(key) > or > constant iterator map_name.find(key) > ``` > > **参数:**该函数接受一个强制性参数键,该键指定要在Map容器中搜索的键。 > > > > **返回值:**该函数返回一个迭代器或常量迭代器,该迭代器或常量迭代器引用键在映射中的位置。如果映射容器中不存在该键,则它返回引用map.end()的迭代器或常量迭代器。 ###### 1 FrontBackendlMapAdd 在前后端ANF索引表中添加一个新的前后端ANF关系 ```cpp /* front_anf:前端ANF 即ME层前端构建的ANF backend_anf:后端ANF 即ME层后端构建的ANF */ void KernelGraph::FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNodePtr &backend_anf) { MS_EXCEPTION_IF_NULL(front_anf); MS_EXCEPTION_IF_NULL(backend_anf); //如果从存储前端ANF->后端ANF的MAP中找到了对应的front_anf 就debug输出 if (front_backend_anf_map_.find(front_anf) != front_backend_anf_map_.end()) { MS_LOG(EXCEPTION) "Anf " front_anf->DebugString() " has been exist in the front_backend_anf_map_"; } //如果从存储后端ANF->前端ANF对应的ANF的MAP中找到了对应的backend_anf 就debug输出 if (backend_front_anf_map_.find(backend_anf) != backend_front_anf_map_.end()) { auto front_node = front_anf->cast(); MS_EXCEPTION_IF_NULL(front_node); auto attr_input = front_node->input(kAnfPrimitiveIndex); if (!attr_input->isa()) { MS_LOG(EXCEPTION) "Kernel " backend_anf->DebugString() "has been exist in the backend_front_anf_map_"; } } //在两个map中添加新的前后端ANF对应 front_backend_anf_map_[front_anf] = backend_anf; backend_front_anf_map_[backend_anf] = front_anf; } ``` ###### 2 FrontBackendlMapUpdate 更新后端ANF ```cpp /* old_backend_anf:需要替代的旧ANF图 new_backend_anf:新的ANF图,用于替代旧的ANF图 */ void KernelGraph::FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, const AnfNodePtr &new_backend_anf) { MS_EXCEPTION_IF_NULL(old_backend_anf); MS_EXCEPTION_IF_NULL(new_backend_anf); //判断新的ANF和之前的ANF是否相同 if (old_backend_anf == new_backend_anf) { MS_LOG(DEBUG) "Old same with new:" old_backend_anf->DebugString(); return; } //在后->前端ANF对照表中查找之前旧的ANF图,如果没有找到,说明输入的旧ANF图有误 if (backend_front_anf_map_.find(old_backend_anf) == backend_front_anf_map_.end()) { MS_LOG(DEBUG) "Old_backend_anf " old_backend_anf->DebugString() " is not exist in the map"; return; } //在前->后端ANF对照表中查找之前旧的ANF图,如果没有找到,说明输入的旧ANF图有误 if (front_backend_anf_map_.find(backend_front_anf_map_[old_backend_anf]) == front_backend_anf_map_.end()) { MS_LOG(EXCEPTION) "Anf is not exist in the map ,old " old_backend_anf->DebugString(); } //如果成功找到,就进行替换 if (IsInternalOutput(old_backend_anf)) { ReplaceInternalOutput(old_backend_anf, new_backend_anf); } //在两个对照表中更新 front_backend_anf_map_[backend_front_anf_map_[old_backend_anf]] = new_backend_anf; backend_front_anf_map_[new_backend_anf] = backend_front_anf_map_[old_backend_anf]; // 删除旧的算子 (void)backend_front_anf_map_.erase(old_backend_anf); } ``` ###### ###### 3 AnfNodePtr GetBackendAnfByFrontAnf 通过前端->后端ANF对照表找到后端ANF ```cpp /* para:front_anf:用于查找后端ANF的前端ANF输入 return:front_backend_anf_map_[front_anf]:在map中查找到的对应的后端ANF */ AnfNodePtr KernelGraph::GetBackendAnfByFrontAnf(const AnfNodePtr &front_anf) { //== front_backend_anf_map_.end()说明没有在表中找到 就返回空串 if (front_backend_anf_map_.find(front_anf) == front_backend_anf_map_.end()) { return nullptr; } //找到了就返回对应的后端ANF return front_backend_anf_map_[front_anf]; } ``` ###### 4 GetFrontAnfByBackendAnf 通过后端->前端ANF对照表找到前端ANF。 ```cpp /* front_anf:前端ANF 即ME层前端构建的ANF backend_anf:后端ANF 即ME层后端构建的ANF */ AnfNodePtr KernelGraph::GetFrontAnfByBackendAnf(const AnfNodePtr &backend_anf) { // == backend_front_anf_map_.end()说明没有在表中找到 就返回空串 if (backend_front_anf_map_.find(backend_anf) == backend_front_anf_map_.end()) { return nullptr; } //找到了就返回对应的前端ANF return backend_front_anf_map_[backend_anf]; } ``` ###### 5 BackendNodeExistInFrontBackendMap 检查后端ANF节点是否存在于前后端对照表 ```cpp /* front_anf:前端ANF 即ME层前端构建的ANF backend_anf:后端ANF 即ME层后端构建的ANF return:如果backend_anf在表里 返回true 反之返回false */ bool KernelGraph::BackendNodeExistInFrontBackendMap(const AnfNodePtr &backend_anf) { return backend_front_anf_map_.find(backend_anf) != backend_front_anf_map_.end(); } ``` ###### 6 GetValueNodeByTensor 通过张量获取它的节点值。 ```cpp /* tensor:张量 tensor_to_value_node_map_:张量与对应值节点构成的map */ ValueNodePtr KernelGraph::GetValueNodeByTensor(const mindspore::tensor::TensorPtr &tensor) { if (tensor_to_value_node_map_.find(tensor) == tensor_to_value_node_map_.end()) { return nullptr; } return tensor_to_value_node_map_[tensor]; } ``` ###### 7 TensorValueNodeMapAdd 向张量->值节点map中添加新的张量->值节点关系 ```cpp /* tensor:张量 value_node:张量对应的值节点 tensor_to_value_node_map_:张量与对应值节点构成的map */ void KernelGraph::TensorValueNodeMapAdd(const tensor::TensorPtr &tensor, const ValueNodePtr &value_node) { //检查tensor和value_node是否为空 MS_EXCEPTION_IF_NULL(tensor); MS_EXCEPTION_IF_NULL(value_node); // tensor_to_value_node_map_[tensor] = value_node; } ``` ###### 8 unordered_set<ValueNodePtr> graph_value_nodes 获取ANF计算图中所有的值节点 ###### 9 AddValueNodeToGraph 向计算图中新增值节点 ```cpp void KernelGraph::AddValueNodeToGraph(const ValueNodePtr &value_node) { (void)graph_value_nodes_.insert(value_node); } ``` ###### 10 IsInRefOutputMap 判断输出节点是否存在于ref_out_in_map_这个map中。 ```cpp bool KernelGraph::IsInRefOutputMap(const AnfWithOutIndex &pair) const { return ref_out_in_map_.count(pair) != 0; } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\concat_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/concat_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void ConcatCPUKernel::InitKernel(const CNodePtr &kernel_node) { node_wpt_ = kernel_node;//声明变量 CheckParam(kernel_node);//声明变量 axis_ = LongToInt(AnfAlgo::GetNodeAttr(kernel_node, AXIS));//获取行 auto input_1_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//获取输入值形状 if (axis_ 0) { axis_ = axis_ + SizeToInt(input_1_shape.size()); } } template bool ConcatCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { auto node_ = node_wpt_.lock(); if (!node_) { MS_LOG(EXCEPTION) "node_wpt_ is expired."; } size_t input_num = AnfAlgo::GetInputTensorNum(node_); std::vector> input_flat_shape_list; for (size_t i = 0; i input_num; i++) {//循环遍历数组 auto input_shape_i = AnfAlgo::GetPrevNodeOutputInferShape(node_, i);//获取输出值形状 auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_);//获取面的形状 input_flat_shape_list.push_back(flat_shape); } auto output_addr = reinterpret_cast(outputs[0]->addr);//获取输出值的地址 auto buff_size = outputs[0]->size;//输出增加的大小 // 每个输入的排形状在平面后是相同的 auto before_axis = input_flat_shape_list[0][0];//记录未改变之前行的大小 for (size_t i = 0; i before_axis; ++i) { for (size_t j = 0; j input_num; ++j) { if (input_flat_shape_list[j][1] == 0) { continue; } auto input_j_addr = reinterpret_cast(inputs[j]->addr); auto copy_num = input_flat_shape_list[j][1]; auto offset = copy_num * i; auto ret = memcpy_s(output_addr, buff_size, input_j_addr + offset, copy_num * sizeof(T)); if (ret != EOK) { MS_LOG(EXCEPTION) "memcpy failed.";//函数失败 } output_addr += copy_num; buff_size -= copy_num * sizeof(T); } } return true;//返回值为真 } template void ConcatCPUKernel::CheckParam(const CNodePtr &kernel_node) const { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) {//如果输出值不为1,则记录错误日志:输出值是.但是ConcatCPUKernel需要1个输出 MS_LOG(EXCEPTION) "Output number is " output_num ", but ConcatCPUKernel needs 1 output."; } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\broadcast_to_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的包 #include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h" #include "nnacl/errorcode.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void BroadcastToCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); size_t input_shape_size = input_shape_.size(); size_t output_shape_size = output_shape_.size(); if (output_shape_size input_shape_size) {//如果输出值大小小于输入值大小,记录错误日志:输入值应当小于输出值 MS_LOG(EXCEPTION) " " input_shape_ " to a smaller dimension shape " output_shape_ "."; } if (output_shape_size > MAX_SHAPE_SIZE) {//如果输出值大于最大值大小,记录错误日志:不能广播输出张量输入值形状到输入值形状,超过8-D MS_LOG(EXCEPTION) "Cannot broadcast input tensor with shape " input_shape_ " to a shape " output_shape_ " more than 8-D."; } size_t offset = output_shape_size - input_shape_size; for (size_t i = 0; i input_shape_size; ++i) {//循环遍历数组 if (input_shape_[i] != output_shape_[i + offset] && input_shape_[i] != 1) { MS_LOG(EXCEPTION) "Cannot broadcast input tensor with shape " input_shape_ " to a shape " output_shape_ "."; } } for (size_t i = 0; i input_shape_size; ++i) {//循环遍历数组获取输入值形状大小 shape_info_.input_shape_[i] = SizeToInt(input_shape_[i]); } for (size_t i = 0; i output_shape_size; ++i) { shape_info_.output_shape_[i] = SizeToInt(output_shape_[i]); } shape_info_.input_shape_size_ = SizeToInt(input_shape_size);//获取输入值大小 shape_info_.output_shape_size_ = SizeToInt(output_shape_size);//获取输出值大小 } template bool BroadcastToCPUKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 1) { MS_LOG(EXCEPTION) "Wrong number of inputs or outputs!";//错误的输入和输出值 } if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) { MS_LOG(EXCEPTION) "Input data is NULL!";//输入值为空 } if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) { MS_LOG(EXCEPTION) "Output data is NULL!";//输出值为空 } const auto input_addr = reinterpret_cast(inputs[0]->addr);//声明变量利用指针获取输入值地址 auto output_addr = reinterpret_cast(outputs[0]->addr);//声明变量利用指针获取输出值地址 int ret = NNACL_ERR; //根据不同数据类型进行不同操作 if constexpr (std::is_same_v) { ret = BroadcastTo(bool, input_addr, &shape_info_, output_addr); } else if constexpr (std::is_same_v) { ret = BroadcastTo(int, input_addr, &shape_info_, output_addr); } else if constexpr (std::is_same_v) { ret = BroadcastTo(float, input_addr, &shape_info_, output_addr); } else { MS_LOG(EXCEPTION) "Not supported data type for BroadcastTo.";//对于广播没有支持的数据类型 } if (ret == NNACL_OK) { return true; }//输入值到输出值带形广播张执行失败 MS_LOG(ERROR) "Broadcast tensor with shape " input_shape_ " to shape " output_shape_ " execute failed."; return false;//返回值为假 } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\binary_cross_entropy_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/binary_cross_entropy_cpu_kernel.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 template void BinaryCrossEntropyCpuKernel::LaunchToScalar(const int &input_size, const int &reduction, T *loss, T *tmp_loss) {//二进制熵 if (input_size % 2 == 1) { tmp_loss[0] += tmp_loss[input_size - 1]; } for (int stride = input_size / 2; stride > 0; stride >>= 1) {//循环遍历数组 for (int i = 0; i stride; i++) { tmp_loss[i] += tmp_loss[i + stride]; } if (stride > 2 && stride % 2 == 1) { tmp_loss[0] += tmp_loss[stride - 1]; } } loss[0] += tmp_loss[0]; if (reduction == 1) { loss[0] /= static_cast(input_size); } } template void BinaryCrossEntropyCpuKernel::Launchkernel(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { T *input_x = reinterpret_cast(inputs[0]->addr);//声明变量获取输入x的地址 T *input_y = reinterpret_cast(inputs[1]->addr);//声明变量获取输入y的地址 T *weight = reinterpret_cast(inputs[2]->addr);//获取重量地址 T *loss = reinterpret_cast(outputs[0]->addr);//获取损失值地址 std::vector tmp_loss(input_size_); T epsilon = static_cast(1e-12);//声明变量并赋值 T one = static_cast(1); if (reduction_ == 0) { for (size_t i = 0; i input_size_; i++) { T value = -weight[i] * (input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon)); loss[i] = value; } } else { for (size_t i = 0; i input_size_; i++) { T value = -weight[i] * (input_y[i] * log(input_x[i] + epsilon) + (one - input_y[i]) * log(one - input_x[i] + epsilon)); tmp_loss[i] = value; } } if (reduction_ != 0) { LaunchToScalar(input_size_, reduction_, loss, tmp_loss.data()); } } bool BinaryCrossEntropyCpuKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { if (input_size_ > 0) {//如果输入值大于0 if (dtype_ == kNumberTypeFloat32) {//数值数据类型等于Float32,启动内核float Launchkernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeFloat16) {//数值数据类型等于Float16,启动内核float Launchkernel(inputs, workspace, outputs); } } return true;//返回值为空 } void BinaryCrossEntropyCpuKernel::InitKernel(const CNodePtr &kernel_node) { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//起别名获取输入大小 for (size_t i = 0; i input_shape.size(); i++) {//循环遍历数组计算输入值大小 input_size_ *= input_shape[i]; } string reduction = AnfAlgo::GetNodeAttr(kernel_node, "reduction"); if (reduction == "none") { reduction_ = 0; } else if (reduction == "sum") { reduction_ = 2; } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);//h } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\bias_add_grad_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/bias_add_grad_cpu_kernel.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输入值大小 if (input_shape_.size() 2) {//对于'BiasAddGrad' Op,输入张量的秩必须至少为2,但是输入张量的秩是 MS_LOG(EXCEPTION) "Input tensor's rank must be at least 2 for 'BiasAddGrad' Op, but input tensor's rank is " input_shape_.size(); } } bool BiasAddGradCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 1) {//若输入输出大小不等于1,则记录错误日志:输入 输出大小不支持 MS_LOG(EXCEPTION) "input output size not support"; } auto output_addr = reinterpret_cast(outputs[0]->addr); auto input_addr = reinterpret_cast(inputs[0]->addr); if (input_shape_.size() > 2) {//输入形状大小大于2时 size_t hw_size = 1;//初始化变量 for (size_t i = 2; i input_shape_.size(); ++i) {//循环遍历数组计算输入值大小 hw_size *= input_shape_[i]; } size_t c_size = input_shape_[1]; for (size_t c = 0; c c_size; ++c) { output_addr[c] = 0;//初始化变量 for (size_t n = 0; n input_shape_[0]; ++n) { size_t offset = n * c_size * hw_size + c * hw_size; for (size_t hw = 0; hw hw_size; ++hw) {//输入形状大小大于2时 output_addr[c] += input_addr[offset + hw]; } } } } else if (input_shape_.size() == 2) {//输入形状大小等于2时 for (size_t c = 0; c input_shape_[1]; ++c) { output_addr[c] = 0;//初始化变量 size_t n_offset = 0; for (size_t n = 0; n input_shape_[0]; ++n) {//循环遍历数组计算出值大小与总体偏差值 output_addr[c] += input_addr[c + n_offset]; n_offset += input_shape_[1]; } } } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\bias_add_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2019 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/bias_add_cpu_kernel.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断节点是否为空 input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);//获取输入大小 bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);//获取偏差值大小 data_shape_ = input_shape_.size();//获取数据类型 //异常处理 if (input_shape_.size() 2) {//对于BiasAdd Op,输入张量的秩必须至少是2,但是输入张量的秩是... MS_LOG(EXCEPTION) "Input tensor's rank must be at least 2 for 'BiasAdd' Op, but input tensor's rank is " input_shape_.size(); } if (bias_shape_.size() != 1) {//对于“Bias add”Op, Bias的排名必须是1,但是Bias的排名是 MS_LOG(EXCEPTION) "Bias's rank must be 1 for 'BiasAdd' Op, but bias' rank is" bias_shape_.size(); } if (input_shape_[1] != bias_shape_[0]) {//偏置形状不匹配,偏置形状必须等于C通道的形状.. MS_LOG(EXCEPTION) "Bias shape not match, bias shape must be equal to C channel's shape"; } } bool BiasAddCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() != 2 || outputs.size() != 1) { MS_LOG(EXCEPTION) "inputs outputs size not supoort";//输入输出大小不支持 } auto src_addr = reinterpret_cast(inputs[0]->addr);//获取源数据地址 auto bias_addr = reinterpret_cast(inputs[1]->addr);//声明变量,获取偏差值地址 auto output_addr = reinterpret_cast(outputs[0]->addr);、//声明变量,获取输出值地址 if (input_shape_.size() > 2) { size_t hw_size = 1;//初始化变量 for (size_t i = 2; i input_shape_.size(); ++i) {//循环数组计算输入大小 hw_size *= input_shape_[i]; } size_t c_size = input_shape_[1]; for (size_t n = 0; n input_shape_[0]; ++n) { for (size_t c = 0; c c_size; ++c) { size_t offset = n * c_size * hw_size + c * hw_size; size_t hw = 0;//初始化变量 #ifdef ENABLE_AVX constexpr size_t C8NUM = 8;//常量 size_t hw8 = hw_size / C8NUM * C8NUM; const float *in_ptr = src_addr + offset; float *out_ptr = output_addr + offset; for (; hw hw8; hw += C8NUM) { __m256 src_r1 = _mm256_loadu_ps(in_ptr); __m256 bias_r2 = _mm256_set1_ps(bias_addr[c]); __m256 dst_r3 = _mm256_add_ps(src_r1, bias_r2); _mm256_storeu_ps(out_ptr, dst_r3); in_ptr += C8NUM; out_ptr += C8NUM; } #endif for (; hw hw_size; ++hw) {//循环b output_addr[offset + hw] = src_addr[offset + hw] + bias_addr[c]; } } } } else { size_t n_offset = 0; for (size_t n = 0; n input_shape_[0]; ++n) { for (size_t c = 0; c input_shape_[1]; ++c) { output_addr[n_offset + c] = src_addr[n_offset + c] + bias_addr[c]; } n_offset += input_shape_[1]; } } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\assignadd_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的包 #include "backend/kernel_compiler/cpu/mkldnn/assignadd_cpu_kernel.h" #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" #include "runtime/device/cpu/cpu_device_address.h" #include "utils/ms_utils.h" //声明双重空间 namespace mindspore { namespace kernel { //初始化内核 void AssignAddCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node);//判断内核是都为空 std::vector src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src1_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); if (src1_shape.size() == 0 && src0_shape.size() == 0) { src0_shape.insert(src0_shape.begin(), 1); src1_shape.insert(src1_shape.begin(), 1); } if (src0_shape.size() != src1_shape.size() && src1_shape.size() > 1) {//内核只支持相同的输入 MS_LOG(EXCEPTION) "AssignAdd only support same dim input or tensor * scalar " src0_shape.size() " vs " src1_shape.size(); } if (src1_shape.size() src0_shape.size()) { for (size_t i = src1_shape.size(); i src0_shape.size(); ++i) { src1_shape.emplace_back(1); } } dnnl::memory::desc src0_desc = GetDefaultMemDesc(src0_shape); dnnl::memory::desc src1_desc = GetDefaultMemDesc(src1_shape); dnnl::binary::desc desc = dnnl::binary::desc(dnnl::algorithm::binary_add, src0_desc, src1_desc, src0_desc); auto prim_desc = dnnl::binary::primitive_desc(desc, MKLKernelEngine::Get().engine()); primitive_ = std::make_shared(prim_desc); AddArgument(DNNL_ARG_SRC_0, src0_desc); AddArgument(DNNL_ARG_SRC_1, src1_desc); AddArgument(DNNL_ARG_DST, src0_desc); } //检查ni bool AssignAddCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (inputs.size() 2) {//内核错误输入输出大小 MS_LOG(EXCEPTION) "AssignAdd error input output size!"; } SetArgumentHandle(DNNL_ARG_SRC_0, inputs[0]->addr); SetArgumentHandle(DNNL_ARG_SRC_1, inputs[1]->addr); SetArgumentHandle(DNNL_ARG_DST, outputs[0]->addr); ExecutePrimitive(); auto ret = memcpy_s(inputs[0]->addr, inputs[0]->size, outputs[0]->addr, outputs[0]->size); if (ret != 0) { MS_LOG(EXCEPTION) "Memcpy_s error, errorno " ret; return false; } return true; } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\assign_cpu_kernel.cc代码标注 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入自定义的头文件 #include "backend/kernel_compiler/cpu/assign_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" //导入系统自带的头文件 #include #include namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 static std::map input_x_dtype_size_map = {//用图表的形式存储数据类型 {kNumberTypeBool, sizeof(bool)}, {kNumberTypeInt8, 1}, {kNumberTypeInt16, 2}, {kNumberTypeInt32, 4}, {kNumberTypeInt64, 8}, {kNumberTypeUInt8, 1}, {kNumberTypeUInt16, 2}, {kNumberTypeUInt32, 4}, {kNumberTypeUInt64, 8}, {kNumberTypeFloat16, 2}, {kNumberTypeFloat32, 4}, {kNumberTypeFloat64, 8}}; void AssignCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); auto input_x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);//声明变量并赋值 auto input_y_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);//声明变量并赋值 if (input_x_shape.size() != input_y_shape.size()) MS_LOG(EXCEPTION) "x y must be same shape"; for (size_t i = 0; i input_x_shape.size(); ++i) { if (input_x_shape[i] != input_y_shape[i]) {//比较x和y的大小,判断是否相同 MS_LOG(EXCEPTION) "x y must be same shape"; } batch_size_ *= input_x_shape[i]; } input_x_dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//获取x的数据类型 if (input_x_dtype_size_map.find(input_x_dtype_) == input_x_dtype_size_map.end()) {//比较x的数据类型是否是图表内含有的数据类型,若有,则记录错误日志:不支持数据类型,若有,声明变量存储 MS_LOG(EXCEPTION) "unsupported input_x dtype"; } input_x_dtype_size_ = input_x_dtype_size_map[input_x_dtype_]; } bool AssignCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { auto max_size = inputs[0]->size;//声明变量利用指针获取最大的类型 size_t total_size = input_x_dtype_size_ * batch_size_;//计算得总共的数据大小 if (total_size > max_size) {//错误分析:函数数据总大小大于最大大小,则记录错误日志 MS_LOG(EXCEPTION) "Memcpy size must = max_size, but got memcpy size is : " total_size ", max size is : " max_size; } int ret = memcpy_s(inputs[0]->addr, max_size, inputs[1]->addr, total_size); if (ret != 0) {//若返回值不为0,记录错误日志 MS_LOG(EXCEPTION) "memcpy_s error, error no " ret; } ret = memcpy_s(outputs[0]->addr, max_size, inputs[1]->addr, total_size); if (ret != 0) {//若返回值不为0,记录错误日志 MS_LOG(EXCEPTION) "memcpy_s error, error no " ret; } return true;.//返回值为真 } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_self_cpu_kernel.cc代码标注3 ```c++ //模板取反双曲正弦 template void Asinh(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = asinh(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } //模板取反双曲余弦值 template void Acosh(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = acosh(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } //模板 取反双曲正切值 template void Atanh(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = atanh(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } } // namespace //数据类型 static const std::map kArithmeticOpTypeMap = {{prim::kPrimNeg->name(), NEG}, {prim::kPrimSquare->name(), SQUARE}, {prim::kPrimOnesLike->name(), ONESLIKE}, {prim::kPrimZerosLike->name(), ZEROSLIKE}, {prim::kPrimLogicalNot->name(), LOGICALNOT}, {prim::kPrimSign->name(), SIGN}, {prim::kPrimFloor->name(), FLOOR}, {prim::kPrimRint->name(), RINT}, {prim::kPrimRound->name(), ROUND}, {prim::kPrimReciprocal->name(), RECIPROCAL}, {prim::kPrimGeLU->name(), GELU}, {prim::kPrimAsin->name(), ASIN}, {prim::kPrimACos->name(), ACOS}, {prim::kPrimAtan->name(), ATAN}, {prim::kPrimSin->name(), SIN}, {prim::kPrimCos->name(), COS}, {prim::kPrimTan->name(), TAN}, {prim::kPrimSinh->name(), SINH}, {prim::kPrimCosh->name(), COSH}, {prim::kPrimAsinh->name(), ASINH}, {prim::kPrimAcosh->name(), ACOSH}, {prim::kPrimAtanh->name(), ATANH}}; void ArithmeticSelfCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);//声明变量并赋值 operate_type_ = kArithmeticOpTypeMap.at(kernel_name); dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);//数据类型 target_dtype_ = AnfAlgo::GetOutputInferDataType(kernel_node, 0);//目标数据类型 } //判断数据类型,若不是支持的数据类型则记录错误日志 bool ArithmeticSelfCPUKernel::Launch(const std::vector &inputs, const std::vector & /*workspace*/, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16 || dtype_ == kNumberTypeFloat64) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt32 || dtype_ == kNumberTypeInt16) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { LaunchKernel(inputs, outputs); } else if (dtype_ == kNumberTypeBool) { LaunchKernelLogic(inputs, outputs); } else { MS_LOG(EXCEPTION) "Data type is " TypeIdLabel(dtype_) "is not support.";//错误日志:数据类型是...是不支持的 } return true;//返回值为真 } template void ArithmeticSelfCPUKernel::LaunchKernelLogic(const std::vector &inputs, const std::vector &outputs) { T *input = reinterpret_cast(inputs[0]->addr);//输入值 T *output = reinterpret_cast(outputs[0]->addr);//输入值输出值 size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; LogicalNot(input, output, lens); return; } template void ArithmeticSelfCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { T *input = reinterpret_cast(inputs[0]->addr); T *output = reinterpret_cast(outputs[0]->addr); size_t lens = outputs[0]->size > 0 ? static_cast(outputs[0]->size / sizeof(T)) : 1; //以图标形式存储数据类型 static const std::map> kArithmeticOpFuncMap = { {SQUARE, Square}, {SIGN, Sign}, {NEG, Neg}, {LOGICALNOT, LogicalNot}, {ONESLIKE, OnesLike}, {ZEROSLIKE, ZerosLike}, {FLOOR, Floor}, {RECIPROCAL, Reciprocal}, {GELU, Gelu}, {SIN, Sin}, {COS, Cos}, {TAN, Tan}, {ASIN, Asin}, {ACOS, ACos}, {ATAN, Atan}, {SINH, Sinh}, {COSH, Cosh}, {ASINH, Asinh}, {ACOSH, Acosh}, {ATANH, Atanh}, {RINT, Rint}, {ROUND, Round}}; if (kArithmeticOpFuncMap.find(operate_type_) != kArithmeticOpFuncMap.end()) { kArithmeticOpFuncMap.at(operate_type_)(input, output, lens); } else { MS_LOG(EXCEPTION) "Not support " operate_type_;//不支持的数据类型 } } } // namespace kernel } // namespace mindspore ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_self_cpu_kernel.cc代码标注2 ```c++ //模板将nearbyint(in[i])输出值都转换成T类型 template void Round(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = static_cast(nearbyint(in[i])); } }; CPUKernelUtils::ParallelFor(task, size); } //模板将1.0 / in[i]输出值都转换成T类型 template void Reciprocal(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = static_cast(1.0 / in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Gelu(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { T x = in[i]; auto double_x = static_cast(x);//声明变量并赋值 T tanh_res = (T)std::tanh(0.7978845608 * (double_x + 0.044715 * double_x * double_x * double_x)); out[i] = x * ((T)1.0 + tanh_res) / (T)2.0;//输出值除以2 } }; CPUKernelUtils::ParallelFor(task, size); } template void Asin(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,值输入值反正弦 out[i] = asin(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void ACos(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值反余弦 out[i] = acos(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Atan(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值反正切 out[i] = atan(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Sin(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值正弦 out[i] = sin(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Cos(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值取余弦 out[i] = cos(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Tan(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值取正切 out[i] = tan(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Sinh(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值取双曲正弦 out[i] = sinh(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } template void Cosh(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,输入值取双曲z out[i] = cosh(in[i]); } }; CPUKernelUtils::ParallelFor(task, size); } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_self_cpu_kernel.cc代码标注1 ```c++ /** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //导入系统自带的头文件 #include #include #include #include //导入自定义的包 #include "backend/kernel_compiler/cpu/arithmetic_self_cpu_kernel.h" #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore {//声明一个空间 namespace kernel {//空间嵌套 namespace {//空间嵌套 template //模板 void Square(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组,计算平方 out[i] = in[i] * in[i]; } }; CPUKernelUtils::ParallelFor(task, size); } //模板,特征化处理,将小于0的数记作-1,大于0的数记作1,0还是0 template void Sign(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { if (in[i] 0) { out[i] = -1; } else if (in[i] > 0) { out[i] = 1; } else { out[i] = 0; } } }; CPUKernelUtils::ParallelFor(task, size); } //模板取反 template void Neg(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = -in[i]; } }; CPUKernelUtils::ParallelFor(task, size); } //模板判断输入输出值是否相同 template void LogicalNot(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = !in[i]; } }; CPUKernelUtils::ParallelFor(task, size); } //模板得到像0的数 template void OnesLike(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) {//循环遍历数组计算输出值 out[i] = static_cast(1); } }; CPUKernelUtils::ParallelFor(task, size); } //模板得到像0的数 template void ZerosLike(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = static_cast(0); } }; CPUKernelUtils::ParallelFor(task, size); } //模板将输出值都转换成T类型 template void Floor(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = static_cast(floor(in[i])); } }; CPUKernelUtils::ParallelFor(task, size); } //模板将输出值都转换成T类型 template void Rint(const T *in, T *out, size_t size) { auto task = [&](size_t start, size_t end) { for (size_t i = start; i end; i++) { out[i] = static_cast(rint(in[i])); } }; CPUKernelUtils::ParallelFor(task, size); } ```
-
# mindspore\mindspore\ccsrc\backend\kernel_compiler\cpu\arithmetic_logic_cpu_kernel.cc代码标注2 ```c++ template //模板 void ArithmeticLogicCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { CPUKernel::InitInputOutputSize(kernel_node); workspace_size_list_.emplace_back(output_size_ * sizeof(T));//直接在vector的内存空间中原地构造一个新的对象 workspace_size_list_.emplace_back(output_size_ * sizeof(T));//直接在vector的内存空间中原地构造一个新的对象 } template bool ArithmeticLogicCPUKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { T *input1 = reinterpret_cast(inputs[0]->addr);//声明变量转换第一个输入值 T *input2 = reinterpret_cast(inputs[1]->addr);//声明变量转换第二个输入值 bool *output = reinterpret_cast(outputs[0]->addr);//声明变量存储输出值 T *broadcastedInput1 = reinterpret_cast(workspace[0]->addr);//声明变量并赋值 T *broadcastedInput2 = reinterpret_cast(workspace[1]->addr); Broadcast(broadcastedInput1, broadcastedInput2, input1, input2); //判断算子内核数据类型 if (operate_type_ == LESS) { Less(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == EQUAL) { Equal(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == NOTEQUAL) { NotEqual(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == GREATER) { Greater(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == GREATEREQUAL) { GreaterEqual(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == LESSEQUAL) { LessEqual(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == LOGICALAND) { LogicalAnd(broadcastedInput1, broadcastedInput2, output); } else if (operate_type_ == LOGICALOR) { LogicalOr(broadcastedInput1, broadcastedInput2, output); } else { MS_LOG(EXCEPTION) "Not support " operate_type_;//不支持的数据类型 return false; } return true; } template void ArithmeticLogicCPUKernel::Broadcast(T *broadcastedInput1, T *broadcastedInput2, T *input1, T *input2) { for (size_t i = 0; i output_size_; ++i) {//循环遍历数组 size_t idx1 = 0;//初始化变量 size_t idx2 = 0;//初始化变量 GenIndex(i, &idx1, &idx2); broadcastedInput1[i] = input1[idx1]; broadcastedInput2[i] = input2[idx2]; } } template void ArithmeticLogicCPUKernel::GenIndex(size_t num, size_t *idx1, size_t *idx2) { std::vector tmp; for (size_t i = 0; i output_shape_.size() - 1; ++i) {//循环遍历数组 if (output_element_num_[i] > num) { tmp.push_back(0);//添加一个元素0 } else { tmp.push_back(num / output_element_num_[i]); num %= output_element_num_[i]; } } tmp.push_back(num);//添加一个元素num for (size_t k = 0; k tmp.size() - 1; ++k) { if (input_shape1_[k] > 1) {/ *idx1 += tmp[k] * input_element_num1_[k];//广播序列1 } if (input_shape2_[k] > 1) { *idx2 += tmp[k] * input_element_num2_[k];//广播序列2 } } if (input_shape1_[tmp.size() - 1] > 1) { *idx1 += tmp[tmp.size() - 1];//指针存储地址 } if (input_shape2_[tmp.size() - 1] > 1) { *idx2 += tmp[tmp.size() - 1]; } } } // namespace kernel } // namespace mindspore ```
上滑加载中
推荐直播
-
华为开发者空间玩转DeepSeek
2025/03/13 周四 19:00-20:30
马欣 山东商业职业技术学院云计算专业讲师,山东大学、山东建筑大学等多所本科学校学生校外指导老师
同学们,想知道如何利用华为开发者空间部署自己的DeepSeek模型吗?想了解如何用DeepSeek在云主机上探索好玩的应用吗?想探讨如何利用DeepSeek在自己的专有云主机上辅助编程吗?让我们来一场云和AI的盛宴。
即将直播 -
华为云Metastudio×DeepSeek与RAG检索优化分享
2025/03/14 周五 16:00-17:30
大海 华为云学堂技术讲师 Cocl 华为云学堂技术讲师
本次直播将带来DeepSeek数字人解决方案,以及如何使用Embedding与Rerank实现检索优化实践,为开发者与企业提供参考,助力场景落地。
去报名
热门标签