From 9c829448ee0bac5a39c7b3bd1720e6987c73fea9 Mon Sep 17 00:00:00 2001 From: Eric Yu Date: Wed, 26 Dec 2018 17:32:34 +0800 Subject: [PATCH 1/7] add sophon backend --- CMakeLists.txt | 1 + lib/Backends/CMakeLists.txt | 8 + lib/Backends/Sophon/AllocationsInfo.h | 60 + .../Sophon/BM188x/BM1880AllocationsInfo.cpp | 142 ++ .../Sophon/BM188x/BM1880AllocationsInfo.h | 52 + lib/Backends/Sophon/BM188x/BM1880Backend.cpp | 296 ++++ lib/Backends/Sophon/BM188x/BM1880Backend.h | 82 ++ lib/Backends/Sophon/BM188x/BM1880CodeGen.cpp | 8 + lib/Backends/Sophon/BM188x/BM1880CodeGen.h | 24 + .../Sophon/BM188x/BM1880CodeGenBMK.cpp | 905 +++++++++++++ lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.h | 52 + .../Sophon/BM188x/BM1880DumpAllPass.cpp | 77 ++ .../Sophon/BM188x/BM1880DumpAllPass.h | 9 + .../Sophon/BM188x/BM1880ExpandSophonInst.cpp | 363 +++++ .../Sophon/BM188x/BM1880ExpandSophonInst.h | 20 + .../Sophon/BM188x/BM1880HandleReshapePass.cpp | 89 ++ .../Sophon/BM188x/BM1880HandleReshapePass.h | 9 + .../BM188x/BM1880InsertLoadStorePass.cpp | 81 ++ .../Sophon/BM188x/BM1880InsertLoadStorePass.h | 9 + lib/Backends/Sophon/BM188x/BM1880Instr.def | 15 + .../Sophon/BM188x/BM1880MemoryAllocPass.cpp | 41 + .../Sophon/BM188x/BM1880MemoryAllocPass.h | 10 + .../BM188x/BM1880TargetTransformInfo.cpp | 84 ++ .../Sophon/BM188x/BM1880TargetTransformInfo.h | 33 + .../Sophon/BM188x/BM188xLMemSizeVisitor.cpp | 256 ++++ .../Sophon/BM188x/BM188xLMemSizeVisitor.h | 72 + lib/Backends/Sophon/BM188x/CMakeLists.txt | 13 + lib/Backends/Sophon/Bundle.cpp | 169 +++ lib/Backends/Sophon/Bundle.h | 57 + lib/Backends/Sophon/CMakeLists.txt | 58 + lib/Backends/Sophon/CommandLine.cpp | 17 + lib/Backends/Sophon/CommandLine.h | 21 + lib/Backends/Sophon/GlowLIRVisitor.cpp | 41 + lib/Backends/Sophon/GlowLIRVisitor.h | 30 + lib/Backends/Sophon/SophonBackend.cpp | 51 + lib/Backends/Sophon/SophonBackend.h | 62 + lib/Backends/Sophon/SophonFunction.cpp | 114 ++ lib/Backends/Sophon/SophonFunction.h | 39 + lib/Backends/Sophon/SophonQuantizer.cpp | 190 +++ lib/Backends/Sophon/SophonQuantizer.h | 59 + .../Sophon/SophonTargetTransformInfo.h | 29 + lib/Backends/Sophon/Utility/CMakeLists.txt | 3 + lib/Backends/Sophon/Utility/memory.cpp | 23 + lib/Backends/Sophon/Utility/memory.h | 74 + tests/unittests/CMakeLists.txt | 4 + .../Sophon/Backends/BM1880CodeGenTest.cpp | 525 ++++++++ .../Backends/BM1880DeleteQuantizeNodeTest.cpp | 49 + .../Sophon/Backends/BM1880Expand.cpp | 445 ++++++ .../Backends/BM1880ExpandCodeGenTest.cpp | 288 ++++ .../Sophon/Backends/BM1880GetLMemSizeTest.cpp | 94 ++ .../Sophon/Backends/BM1880HIRSliceTest.cpp | 162 +++ .../Sophon/Backends/BM1880MemoryAllocTest.cpp | 149 +++ .../unittests/Sophon/Backends/CMakeLists.txt | 39 + .../Sophon/Backends/LoadStoreTest.cpp | 117 ++ tests/unittests/Sophon/CMakeLists.txt | 18 + tests/unittests/Sophon/testMain.cpp | 8 + tools/ClassGen/Backends/Sophon/CMakeLists.txt | 7 + tools/ClassGen/Backends/Sophon/SophonMI.h | 118 ++ .../ClassGen/Backends/Sophon/SophonOpInstrs.h | 10 + .../Backends/Sophon/SophonSpecificInstrs.h | 1189 +++++++++++++++++ .../Sophon/SophonSpecificInstrsVerification.h | 63 + .../Backends/Sophon/SophonSpecificNodes.h | 636 +++++++++ .../Sophon/SophonSpecificNodesVerification.h | 92 ++ tools/ClassGen/CMakeLists.txt | 4 + tools/ClassGen/InstrGen.cpp | 1 + tools/ClassGen/NodeGen.cpp | 1 + 66 files changed, 7867 insertions(+) create mode 100644 lib/Backends/Sophon/AllocationsInfo.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880Backend.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880Backend.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880CodeGen.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880CodeGen.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880DumpAllPass.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880DumpAllPass.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880Instr.def create mode 100644 lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.h create mode 100644 lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.h create mode 100644 lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.cpp create mode 100644 lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.h create mode 100644 lib/Backends/Sophon/BM188x/CMakeLists.txt create mode 100644 lib/Backends/Sophon/Bundle.cpp create mode 100644 lib/Backends/Sophon/Bundle.h create mode 100644 lib/Backends/Sophon/CMakeLists.txt create mode 100644 lib/Backends/Sophon/CommandLine.cpp create mode 100644 lib/Backends/Sophon/CommandLine.h create mode 100644 lib/Backends/Sophon/GlowLIRVisitor.cpp create mode 100644 lib/Backends/Sophon/GlowLIRVisitor.h create mode 100644 lib/Backends/Sophon/SophonBackend.cpp create mode 100644 lib/Backends/Sophon/SophonBackend.h create mode 100644 lib/Backends/Sophon/SophonFunction.cpp create mode 100644 lib/Backends/Sophon/SophonFunction.h create mode 100644 lib/Backends/Sophon/SophonQuantizer.cpp create mode 100644 lib/Backends/Sophon/SophonQuantizer.h create mode 100644 lib/Backends/Sophon/SophonTargetTransformInfo.h create mode 100644 lib/Backends/Sophon/Utility/CMakeLists.txt create mode 100644 lib/Backends/Sophon/Utility/memory.cpp create mode 100644 lib/Backends/Sophon/Utility/memory.h create mode 100644 tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880DeleteQuantizeNodeTest.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880Expand.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880GetLMemSizeTest.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880HIRSliceTest.cpp create mode 100644 tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp create mode 100644 tests/unittests/Sophon/Backends/CMakeLists.txt create mode 100644 tests/unittests/Sophon/Backends/LoadStoreTest.cpp create mode 100644 tests/unittests/Sophon/CMakeLists.txt create mode 100644 tests/unittests/Sophon/testMain.cpp create mode 100644 tools/ClassGen/Backends/Sophon/CMakeLists.txt create mode 100644 tools/ClassGen/Backends/Sophon/SophonMI.h create mode 100644 tools/ClassGen/Backends/Sophon/SophonOpInstrs.h create mode 100644 tools/ClassGen/Backends/Sophon/SophonSpecificInstrs.h create mode 100644 tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h create mode 100644 tools/ClassGen/Backends/Sophon/SophonSpecificNodes.h create mode 100644 tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b3985dd54c..ac2f991588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,7 @@ enable_testing() option(GLOW_WITH_CPU "Build the LLVM-based JIT CPU backend" ON) option(GLOW_WITH_OPENCL "Build the OpenCL backend" ON) +option(GLOW_WITH_SOPHON "Build the Sophon backend" ON) option(GLOW_BUILD_EXAMPLES "Build the examples" ON) option(GLOW_BUILD_TESTS "Build the tests" ON) option(GLOW_WITH_BUNDLES "Build bundles" OFF) diff --git a/lib/Backends/CMakeLists.txt b/lib/Backends/CMakeLists.txt index 860da68d8c..402fb6405d 100644 --- a/lib/Backends/CMakeLists.txt +++ b/lib/Backends/CMakeLists.txt @@ -1,3 +1,5 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) + add_library(Backends Backends.cpp) add_library(BackendUtils BackendUtils.cpp) @@ -20,6 +22,12 @@ if(GLOW_WITH_CPU) LIST(APPEND linked_backends CPUBackend) endif() + +if(GLOW_WITH_SOPHON) + add_subdirectory(Sophon) + LIST(APPEND linked_backends Sophon) +endif() + target_link_libraries(Backends PRIVATE BackendUtils diff --git a/lib/Backends/Sophon/AllocationsInfo.h b/lib/Backends/Sophon/AllocationsInfo.h new file mode 100644 index 0000000000..b491c3573b --- /dev/null +++ b/lib/Backends/Sophon/AllocationsInfo.h @@ -0,0 +1,60 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H +#define GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H + +#include "llvm/ADT/DenseMap.h" + +namespace glow { +class Value; +class IRFunction; +class Context; + +/// Information about allocations for activations, constant weight variables +/// and mutable weight variables. +class AllocationsInfo { + +public: + virtual ~AllocationsInfo() = default; + /// Assign offsets to all of the variables in the module \p M and to the + /// placeholders. \p ctx is the context that maps the graph to the concrete + /// execution environment for a specific function. + /// If the \p absoluteAddr is true, simply reuse the addresses already used + /// by the payloads of tensors corresponding to those WeightVars as offsets. + /// This is useful in a JIT setup. If \p absoluteAddr is false, then all the + /// WeightVars will get new offsets assigned. + virtual void allocateWeightVars(const IRFunction *F) = 0; + /// Assign offsets to all activations. + /// No actual memory allocation is performed. All the allocations should be + /// performed by the client based on the information provided by the + /// AllocationsInfo. + virtual void allocateActivations(const IRFunction *F) = 0; + /// Assign offsets to all tensorviews. + /// No memory allocation is performed. Sets up all offsets into already + /// defined offsets for WeightVars and AllocActivations. Assumes the weight + /// vars and alloc activations have already been added to allocatedAddressed_. + virtual void allocateTensorViews(const IRFunction *F) = 0; + /// Number all allocations and weight variables by assigning them unique + /// numbers. + virtual void numberValues(const IRFunction *F) = 0; + + virtual llvm::DenseMap &getAllocatedAddress() = 0; + virtual void setActivationsMemSize(size_t v) = 0; + virtual size_t getActivationsMemSize() = 0; +}; + +} // namespace glow +#endif // GLOW_BACKENDS_CPU_ALLOCATIONSINFO_H diff --git a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp new file mode 100644 index 0000000000..b490c68aba --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp @@ -0,0 +1,142 @@ +#define DEBUG_TYPE "bm1880-alloc" + +#include "BM1880AllocationsInfo.h" +#include "glow/CodeGen/MemoryAllocator.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "glow/Graph/Nodes.h" +#include "glow/IR/IRUtils.h" +#include "glow/IR/Instrs.h" +#include "glow/Support/Debug.h" + +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace glow; +using llvm::cast; +using llvm::dyn_cast; +using llvm::isa; + +BM1880AllocationsInfo::BM1880AllocationsInfo() : ctx_(nullptr), TTI_(nullptr) {} + +BM1880AllocationsInfo::BM1880AllocationsInfo( + const Context &ctx, const sophon::SophonTargetTransformInfo *TTI) + : ctx_(&ctx), TTI_(TTI) {} + +void BM1880AllocationsInfo::allocateWeightVars(const IRFunction *F) { + + size_t weightOffset = 0; + // Compute the new offsets for all the weights, do not reuse their current + // addresses. Process all constant WeightVars first. + for (auto &v : F->getGraph()->getParent()->getConstants()) { + assert(isa(F->getWeightForNode(v))); + auto *w = cast(F->getWeightForNode(v)); + auto numBytes = w->getSizeInBytes(); + // size_t addr = constantWeightVarsAllocator.allocate(numBytes, w); + size_t addr = weightOffset; + weightOffset += numBytes; + allocatedAddressed_[w] = addr; + + DEBUG_GLOW(llvm::errs() << "Allocated global weight " << w->getName() + << " size: " << numBytes << " address range: [" + << addr << ", " << addr + numBytes << "]\n"); + } + + // Remember that max required memory size for each kind of weights. + globa_weight_sizes_ = weightOffset; +} + +void BM1880AllocationsInfo::allocateNeuron(WeightVar *W, size_t &neuronOffset) { + auto numBytes = W->getSizeInBytes(); + // size_t addr = activationsAllocator.allocate(numBytes, w); + size_t addr = neuronOffset; + neuronOffset += numBytes; + allocatedAddressed_[W] = addr; + + DEBUG_GLOW(llvm::errs() << "Allocated global input/output " << W->getName() + << " size: " << numBytes << " address range: [" + << addr << ", " << addr + numBytes << "]\n"); +} + +void BM1880AllocationsInfo::allocateActivations(const IRFunction *F) { + // Maps activations and views to some offset within the heap. + llvm::DenseMap activationAddr; + + // global offset start from 0, input data need to start from 0 + size_t neuronOffset = 0; + // allocate input + for (auto &v : F->getGraph()->getParent()->getPlaceholders()) { + assert(isa(F->getWeightForNode(v))); + // unfortunately we need to use prefix to separate input/output + if (v->getName().find("save_") != llvm::StringRef::npos) + continue; + + auto *w = cast(F->getWeightForNode(v)); + allocateNeuron(w, neuronOffset); + } + + // alloc output. + for (auto &v : F->getGraph()->getParent()->getPlaceholders()) { + assert(isa(F->getWeightForNode(v))); + if (v->getName().find("save_") == llvm::StringRef::npos) + continue; + auto *w = cast(F->getWeightForNode(v)); + allocateNeuron(w, neuronOffset); + } + + global_neuron_sizes_ = neuronOffset; + + MemoryAllocator activationsAllocator("activations", + TTI_->getLocalMemSizeInBytes()); + + // Assign device-space addresses to the activations. + for (auto &I : F->getInstrs()) { + if (auto *A = dyn_cast(&I)) { + // FIXME, getLMemSizeFromValue can not accept const variable currently + auto *nonConstI = const_cast(&I); + auto numBytes = TTI_->getLMemSizeFromValue(nonConstI); + + // default MemoryAllocator is always 64-bytes aligned which has meet + // ours eu_num(16) alignment requirement + // TODO it's not efficieny for memory usage. support unaligned alloc + uint64_t addr = activationsAllocator.allocate(numBytes, A); + assert(!activationAddr.count(A) && "Allocation already made!"); + assert(MemoryAllocator::npos != addr); + activationAddr[A] = addr; + continue; + } + + if (auto *D = dyn_cast(&I)) { + auto *A = D->getAlloc(); + assert(activationAddr.count(A) && "Invalid deallocation!"); + activationsAllocator.deallocate(A); + continue; + } + } + + local_memory_sizes_ = activationsAllocator.getMaxMemoryUsage(); + + // Register specific addresses within the heap to activations. + for (auto &A : activationAddr) { + allocatedAddressed_[A.first] = A.second; + uint64_t size = + TTI_->getLMemSizeFromValue(const_cast(A.first)); + DEBUG_GLOW(llvm::errs() << "Allocated activation " << A.first->getName() + << " size: " << size << " address range: [" + << allocatedAddressed_[A.first] << ", " + << allocatedAddressed_[A.first] + size << "]\n";); + } +} + +llvm::DenseMap & +BM1880AllocationsInfo::getAllocatedAddress() { + return allocatedAddressed_; +} + +void BM1880AllocationsInfo::setActivationsMemSize(size_t v) { + global_neuron_sizes_ = v; +} + +size_t BM1880AllocationsInfo::getActivationsMemSize() { + return global_neuron_sizes_; +} diff --git a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h new file mode 100644 index 0000000000..276b7f9533 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h @@ -0,0 +1,52 @@ +#pragma once + +#include "Backends/Sophon/AllocationsInfo.h" +#include "Backends/Sophon/SophonTargetTransformInfo.h" +#include "glow/CodeGen/MemoryAllocator.h" +#include "llvm/ADT/DenseMap.h" + +namespace glow { + +/// Information about allocations for activations, constant weight variables +/// and mutable weight variables. +class BM1880AllocationsInfo : public AllocationsInfo { + +private: + void allocateNeuron(WeightVar *W, size_t &neuronOffset); + +private: + /// Different kinds of values that need to be allocated. + enum class ValueKind { Global_Weight, Global_Neuron, Local_Memory }; + + /// Maps Values in the module to their offsets. + llvm::DenseMap allocatedAddressed_; + /// Amount of memory to be allocated for constant WeightVars. + size_t globa_weight_sizes_{0}; + /// Amount of memory to be allocated for mutable WeightVars. + size_t global_neuron_sizes_{0}; + /// Amount of memory to be allocated for activations. + size_t local_memory_sizes_{0}; + + const Context *ctx_; + const sophon::SophonTargetTransformInfo *TTI_; + +public: + BM1880AllocationsInfo(); + BM1880AllocationsInfo(const Context &ctx, + const sophon::SophonTargetTransformInfo *TTI); + void allocateWeightVars(const IRFunction *F) override; + void allocateActivations(const IRFunction *F) override; + void allocateTensorViews(const IRFunction *F) override { + llvm_unreachable("unsupported!"); + } + /// Number all allocations and weight variables by assigning them unique + /// numbers. + void numberValues(const IRFunction *F) override { llvm_unreachable("TODO!"); } + +public: + llvm::DenseMap &getAllocatedAddress() override; + void setActivationsMemSize(size_t v) override; + size_t getActivationsMemSize() override; +}; + +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880Backend.cpp b/lib/Backends/Sophon/BM188x/BM1880Backend.cpp new file mode 100644 index 0000000000..487ec37265 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880Backend.cpp @@ -0,0 +1,296 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "BM1880Backend.h" +#include "BM1880AllocationsInfo.h" +#include "BM1880CodeGen.h" +#include "BM1880DumpAllPass.h" +#include "BM1880ExpandSophonInst.h" +#include "BM1880HandleReshapePass.h" +#include "BM1880InsertLoadStorePass.h" +#include "BM1880MemoryAllocPass.h" +#include "BM1880TargetTransformInfo.h" +#include "Backends/Sophon/Bundle.h" +#include "Backends/Sophon/CommandLine.h" +#include "Backends/Sophon/SophonFunction.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/NodeValue.h" +#include "glow/Optimizer/Optimizer.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "bm1880_backend" + +using namespace glow; +using llvm::cast; +using llvm::dyn_cast; + +extern llvm::cl::opt target; +static llvm::cl::opt dumpAll("dump-all-neuron", + llvm::cl::desc("dump all neuron"), + llvm::cl::init(false)); + +std::unique_ptr +BM1880Backend::codegen(std::unique_ptr IR, + AllocationsInfo *allocationsInfo) const { + auto model = Bundle(this, *allocationsInfo).codegen(IR.get()); + return llvm::make_unique(std::move(model)); +} + +std::unique_ptr +BM1880Backend::compileIR(std::unique_ptr IR, + const Context &ctx) const { + BM1880AllocationsInfo allocationsInfo(ctx, getTTI()); + runOptimizationPasses(IR.get(), &allocationsInfo); + return codegen(std::move(IR), &allocationsInfo); +} + +std::unique_ptr +BM1880Backend::compile(Function *F, const Context &ctx) const { + auto IR = generateAndOptimizeIR(F, true /*shouldShareBuffers*/); + return compileIR(std::move(IR), ctx); +} + +void BM1880Backend::save(Function *F, llvm::StringRef outputDir, + llvm::StringRef networkName) const { + auto IR = generateAndOptimizeIR(F, true /*shouldShareBuffers*/); + Context ctx; + BM1880AllocationsInfo allocationsInfo(ctx, getTTI()); + runOptimizationPasses(IR.get(), &allocationsInfo); + auto b = Bundle(this, allocationsInfo); + auto model = b.codegen(IR.get()); + Bundle::saveBmodelFile(std::move(model), outputDir); +} + +bool BM1880Backend::isOpSupported(Kinded::Kind opKind, + ElemKind elementTy) const { + // Check for quantization support. + if (elementTy == ElemKind::Int8QTy) { + switch (opKind) { + case Kinded::Kind::SophonConvolutionNodeKind: + case Kinded::Kind::FullyConnectedNodeKind: + case Kinded::Kind::SophonReluNodeKind: + case Kinded::Kind::ReshapeNodeKind: + case Kinded::Kind::SophonMaxPoolNodeKind: + return true; + default: + return false; + } + } + return false; +} + +bool BM1880Backend::shouldLower(const Node *N) const { + switch (N->getKind()) { + default: + return true; + case Kinded::Kind::ConvolutionNodeKind: + case Kinded::Kind::FullyConnectedNodeKind: + case Kinded::Kind::ReluNodeKind: + case Kinded::Kind::BatchNormalizationNodeKind: + return false; + } +} + +static void reorderConvWeight(const SophonConvolutionQ8Inst *Inst, + std::vector &Vec) { + std::vector ref_data = Vec; + + auto in_dim = Inst->getSrc()->getType()->dims(); + auto out_dim = Inst->getDest()->getType()->dims(); + auto kern_dim = Inst->getFilter()->getType()->dims(); + + const int oc = out_dim[1]; + const int kh = kern_dim[2]; + const int kw = kern_dim[3]; + // support group size > 1? + const int group_size = 1; + const int ic = in_dim[1] / group_size; + // conv weight is arranged by (1, oc, kh*kw, ic) + // convert (oc, ic, kh, kw) to (1, oc, kh*kw, ic) + for (int oc_i = 0; oc_i < oc; ++oc_i) { + for (int k_i = 0; k_i < kh * kw; ++k_i) { + for (int ic_i = 0; ic_i < ic; ++ic_i) { + int to = oc_i * (ic * kh * kw) + k_i * ic + ic_i; + int from = oc_i * (ic * kh * kw) + ic_i * (kh * kw) + k_i; + Vec[to] = ref_data[from]; + } + } + } +} + +static void reorder16bit(std::vector &Vec) { + std::vector ref_data = Vec; + assert(Vec.size() % 2 == 0); + size_t count = Vec.size() / 2; + for (size_t i = 0; i < count; ++i) { + Vec[i] = ref_data[i * 2]; + Vec[i + count] = ref_data[i * 2 + 1]; + } +} + +void BM1880Backend::reorderWeights(IRFunction *F) const { + for (auto &v : F->getGraph()->getParent()->getConstants()) { + auto *w = cast(F->getWeightForNode(v)); + auto numBytes = w->getSizeInBytes(); + auto payload = v->getPayload().getUnsafePtr(); + + if (w->getElementType() == glow::ElemKind::Int8QTy) { + if (auto *conv = + dyn_cast(w->getUsers().begin()->get())) { + std::vector orig_weight((uint8_t *)payload, + (uint8_t *)(payload + numBytes)); + reorderConvWeight(conv, orig_weight); + memcpy(payload, orig_weight.data(), numBytes); + } + } else if (w->getElementType() == glow::ElemKind::Int16QTy) { + std::vector orig_weight((uint8_t *)payload, + (uint8_t *)(payload + numBytes)); + reorder16bit(orig_weight); + memcpy(payload, orig_weight.data(), numBytes); + } else { + llvm_unreachable("unsupport type!"); + } + } +} + +void BM1880Backend::generateWeights(IRFunction *F, + AllocationsInfo &allocationsInfo, + std::vector &weights) const { + size_t weights_total_bytes = 0; + for (auto &v : F->getGraph()->getParent()->getConstants()) { + auto *w = cast(F->getWeightForNode(v)); + weights_total_bytes += w->getSizeInBytes(); + } + + weights.resize(weights_total_bytes); + + DEBUG_GLOW(llvm::dbgs() << "generateWeights:\n"); + for (auto &v : F->getGraph()->getParent()->getConstants()) { + auto *w = cast(F->getWeightForNode(v)); + auto numBytes = w->getSizeInBytes(); + auto payload = v->getPayload().getUnsafePtr(); + auto addr = allocationsInfo.getAllocatedAddress()[w]; + memcpy(&(weights.data()[addr]), (uint8_t *)payload, numBytes); + DEBUG_GLOW(llvm::dbgs() + << "weights[" << addr << "]=" << w->getName() << "\n"); + } +} + +void BM1880Backend::codeGenCmdbuf(IRFunction *F, + AllocationsInfo &allocationsInfo, + SophonCmdBuf &cmdbuf) const { + auto codegen = BM1880CodeGen::createCodeGen(F, allocationsInfo); + codegen->performCodeGen(); + cmdbuf = codegen->getCmdbuf(); +} + +void BM1880Backend::runOptimizationPasses( + IRFunction *IR, BM1880AllocationsInfo *allocationsInfo) const { + sophon::runHandleReshape(IR); + reorderWeights(IR); // before insert + sophon::runInsertLoadStorePass(IR); + if (dumpAll) + sophon::runDumpAllPass(IR); + glow::optimize(*IR, true /*shouldShareBuffers*/); + sophon::runMemoryAllocPass(IR, allocationsInfo); + BM1880ExpandSophonInst(IR, *allocationsInfo).run(); +} + +sophon::SophonTargetTransformInfo *BM1880Backend::getTTI() const { + return sophon::BM1880TargetTransformInfo::getInstance(); +} + +bool BM1880Backend::deleteQuantizeNodes(Function *F) const { + auto *module = F->getParent(); + + DEBUG_GLOW(F->dump()); + { + DEBUG_GLOW(PlaceholderList &all_PHs_ = module->getPlaceholders(); + llvm::dbgs() << "init:\n"; for (auto &ph + : all_PHs_) { + llvm::dbgs() << "PH: " << ph->getName() << "\n"; + }); + } + + bool changed = false; + llvm::SmallPtrSet eraseNodes; + // map[oldPH] = newPH + std::map PH_map; + for (auto &n : F->getNodes()) { + if (n.getKind() == Kinded::Kind::QuantizeNodeKind) { + // create new PH and delete quantize node + auto *QN = llvm::cast(&n); + auto old_PH = cast(QN->getInput()); + auto *new_PH = + module->createPlaceholder(QN->getType(0), old_PH->getName(), false); + QN->getResult().replaceAllUsesOfWith(new_PH, F); + eraseNodes.insert(QN); + changed = true; + PH_map.insert({old_PH, new_PH}); + } else if (n.getKind() == Kinded::Kind::DequantizeNodeKind) { + // assume all users is Save node, create a new one with Q8 type + // delete Dequantize and Save nodes + auto *DQN = llvm::cast(&n); + auto *SN = llvm::cast(DQN->getUsers().begin()->getUser()); + auto old_PH = SN->getPlaceholder(); + auto *new_save = F->createSave(old_PH->getName(), DQN->getInput()); + eraseNodes.insert(SN); + eraseNodes.insert(DQN); + PH_map.insert({old_PH, new_save->getPlaceholder()}); + changed = true; + } + } + + { + DEBUG_GLOW(PlaceholderList &all_PHs_ = module->getPlaceholders(); + llvm::dbgs() << "before:\n"; for (auto &ph + : all_PHs_) { + llvm::dbgs() << "PH: " << ph->getName() << "\n"; + }); + } + + // delete old PHs and update new PHs name + PlaceholderList &all_PHs = module->getPlaceholders(); + for (auto &map : PH_map) { + std::string old_name = map.first->getName(); + map.second->setName(old_name); + all_PHs.remove(map.first); + delete (map.first); + } + + { + DEBUG_GLOW(PlaceholderList &all_PHs_ = module->getPlaceholders(); + llvm::dbgs() << "after:\n"; for (auto &ph + : all_PHs_) { + llvm::dbgs() << "PH: " << ph->getName() << "\n"; + }); + } + + for (auto *node : eraseNodes) { + F->eraseNode(node); + } + + DEBUG_GLOW(F->dump()); + return changed; +} + +bool BM1880Backend::transformPreLowering(Function *F, + CompilationMode mode) const { + bool changed; + changed = deleteQuantizeNodes(F); + return changed; +} diff --git a/lib/Backends/Sophon/BM188x/BM1880Backend.h b/lib/Backends/Sophon/BM188x/BM1880Backend.h new file mode 100644 index 0000000000..a37e848df2 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880Backend.h @@ -0,0 +1,82 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "Backends/Sophon/AllocationsInfo.h" +#include "Backends/Sophon/SophonBackend.h" + +#include "glow/Backends/CompiledFunction.h" +#include "glow/Graph/Node.h" + +namespace glow { + +class BM1880AllocationsInfo; + +/// This is the Sophon backend. +class BM1880Backend final : public SophonBackend { +public: + BM1880Backend() = default; + + ~BM1880Backend() override = default; + + std::unique_ptr + compileIR(std::unique_ptr IR, const Context &ctx) const override; + std::unique_ptr + codegen(std::unique_ptr IR, + AllocationsInfo *allocationsInfo) const; + + void runOptimizationPasses(IRFunction *IR, + BM1880AllocationsInfo *allocationsInfo) const; + + /// JIT Mode: compile to FunctionCompiled + std::unique_ptr compile(Function *F, + const Context &ctx) const override; + + /// AOT Mode: save to bmodel + void save(Function *F, llvm::StringRef outputDir, + llvm::StringRef networkName) const override; + + bool transformPreLowering(Function *F, CompilationMode mode) const override; + + bool transformPostLowering(Function *F, CompilationMode mode) const override { + return false; + }; + + bool isOpSupported(Kinded::Kind opKind, ElemKind elementTy) const override; + + bool shouldLower(const Node *N) const override; + + uint32_t getTarget() const override { return 1880; } + + void reorderWeights(IRFunction *F) const; + + void generateWeights(IRFunction *F, AllocationsInfo &allocationsInfo, + std::vector &weights) const override; + + void codeGenCmdbuf(IRFunction *F, AllocationsInfo &allocationsInfo, + SophonCmdBuf &cmdbuf) const override; + + // delete quantize/dequantize nodes + bool deleteQuantizeNodes(Function *F) const; + + // feature + bool hasFP32Inst() const override { return false; } + bool hasInt8Inst() const override { return true; } + virtual sophon::SophonTargetTransformInfo *getTTI() const override; +}; + +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880CodeGen.cpp b/lib/Backends/Sophon/BM188x/BM1880CodeGen.cpp new file mode 100644 index 0000000000..0b23d653c0 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880CodeGen.cpp @@ -0,0 +1,8 @@ +#include "BM1880CodeGenBMK.h" + +namespace glow { +std::unique_ptr +BM1880CodeGen::createCodeGen(IRFunction *F, AllocationsInfo &allocInfo) { + return llvm::make_unique(F, allocInfo); +} +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880CodeGen.h b/lib/Backends/Sophon/BM188x/BM1880CodeGen.h new file mode 100644 index 0000000000..6f8afc3c18 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880CodeGen.h @@ -0,0 +1,24 @@ +#ifndef BM1880_CODEGEN_H +#define BM1880_CODEGEN_H + +#include "Backends/Sophon/AllocationsInfo.h" +#include "glow/Base/Tensor.h" +#include "glow/Graph/Graph.h" +#include "glow/IR/IR.h" +#include "glow/IR/Instrs.h" +#include "glow/Support/Debug.h" +#include + +namespace glow { + +class BM1880CodeGen { +public: + virtual void performCodeGen() = 0; + virtual std::vector getCmdbuf() = 0; + static std::unique_ptr + createCodeGen(IRFunction *F, AllocationsInfo &allocInfo); +}; + +} // namespace glow + +#endif // BM1880_CODEGEN_H diff --git a/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.cpp b/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.cpp new file mode 100644 index 0000000000..f97942677d --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.cpp @@ -0,0 +1,905 @@ +#include "BM1880CodeGenBMK.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +#define DEBUG_TYPE "BM1880_codegenBMK" + +namespace glow { + +BM1880CodeGenBMK::BM1880CodeGenBMK(IRFunction *F, AllocationsInfo &allocInfo) + : F_(F), allocInfo_(allocInfo) { + cmdbuf_size_ = 0x10000000; +} + +void BM1880CodeGenBMK::bmk_init() { + bmk_info_.chip_version = 1880; + bmk_info_.cmdbuf_size = cmdbuf_size_; + bmk_info_.cmdbuf = (u8 *)malloc(bmk_info_.cmdbuf_size); + bmk_ctx_ = nullptr; + bmk_ctx_ = bmk1880_register(&bmk_info_); +} + +void BM1880CodeGenBMK::bmk_deinit() { + if (bmk_ctx_) { + // hack api, update sync id + u32 size; + const u8 *cmdbuf = bmk1880_acquire_cmdbuf(bmk_ctx_, &size); + cmdbuf_.resize(size); + memcpy(&cmdbuf_[0], cmdbuf, size); + bmk1880_cleanup(bmk_ctx_); + } + if (bmk_info_.cmdbuf) { + free(bmk_info_.cmdbuf); + bmk_info_.cmdbuf = nullptr; + } +} + +uint64_t BM1880CodeGenBMK::emitValueAddress(const glow::Value *val) { + assert(allocInfo_.getAllocatedAddress().count(val)); + return allocInfo_.getAllocatedAddress()[val]; +} + +void BM1880CodeGenBMK::visit(const SophonMIGDMAGlobalToLocalInst *inst) { + auto *global = inst->getGlobal(); + auto *local = inst->getLocal(); + auto addr_local = emitValueAddress(local); + auto addr_global = emitValueAddress(global); + + auto shapeNCHW = inst->getShapeNCHW(); + auto globalStrideNCH = inst->getGlobalStrideNCH(); + int n = shapeNCHW[0]; + int c = shapeNCHW[1]; + int h = shapeNCHW[2]; + int w = shapeNCHW[3]; + int stride_n = globalStrideNCH[0]; + int stride_c = globalStrideNCH[1]; + int stride_h = globalStrideNCH[2]; + + // TODO(arcbb): support LocalStride for user would be easier + bool is_local_aligned = inst->getIsLocalAligned(); + bool in_weight_space = inst->getIsGlobalWeightSpace(); + + // bmkernel code + u64 gaddr = addr_global; + laddr_t lmem_addr = addr_local; + tensor_lmem *lmem; + ctrl_t ctrls; + + if (is_local_aligned) { + lmem = bmk1880_tl_prealloc_align(bmk_ctx_, lmem_addr, shape_t4(n, c, h, w), + FMT_I8); + } else { + lmem = + bmk1880_tl_prealloc(bmk_ctx_, lmem_addr, shape_t4(n, c, h, w), FMT_I8); + } + + if (in_weight_space) + ctrls = CTRL_WEIGHT; + else + ctrls = CTRL_NEURON; + + stride_t stride = stride_st4(stride_n, stride_c, stride_h, 1); + bmk1880_gdma_load_stride(bmk_ctx_, lmem, gaddr, stride, ctrls); + bmk1880_tl_free(bmk_ctx_, lmem); + + DEBUG_GLOW( + llvm::dbgs() << llvm::format( + "%d\tgdma_load_stride: local=0x%x, global=0x%x, shape=(%d,%d,%d,%d), " + "stride=(%d,%d,%d,%d), aligned=%d, WEIGHT=%d\n", + index++, addr_local, addr_global, n, c, h, w, stride_n, stride_c, + stride_h, 1, is_local_aligned, in_weight_space)); +} + +void BM1880CodeGenBMK::visit(const SophonMIGDMALocalToGlobalInst *inst) { + auto *global = inst->getGlobal(); + auto *local = inst->getLocal(); + auto addr_local = emitValueAddress(local); + auto addr_global = emitValueAddress(global); + + std::vector shapeNCHW = inst->getShapeNCHW(); + auto globalStrideNCH = inst->getGlobalStrideNCH(); + while (shapeNCHW.size() < 4) + shapeNCHW.insert(shapeNCHW.begin(), 1); + assert(shapeNCHW.size() == 4); + int n = shapeNCHW[0]; + int c = shapeNCHW[1]; + int h = shapeNCHW[2]; + int w = shapeNCHW[3]; + assert(globalStrideNCH.size() == 3); + int stride_n = globalStrideNCH[0]; + int stride_c = globalStrideNCH[1]; + int stride_h = globalStrideNCH[2]; + + // TODO(arcbb): support LocalStride for user would be easier + bool is_local_aligned = inst->getIsLocalAligned(); + bool in_weight_space = inst->getIsGlobalWeightSpace(); + + // bmkernel code + u64 gaddr = addr_global; + laddr_t lmem_addr = addr_local; + tensor_lmem *lmem; + ctrl_t ctrls; + + if (is_local_aligned) { + lmem = bmk1880_tl_prealloc_align(bmk_ctx_, lmem_addr, shape_t4(n, c, h, w), + FMT_I8); + } else { + lmem = + bmk1880_tl_prealloc(bmk_ctx_, lmem_addr, shape_t4(n, c, h, w), FMT_I8); + } + + if (in_weight_space) + ctrls = CTRL_WEIGHT; + else + ctrls = CTRL_NEURON; + + stride_t stride = stride_st4(stride_n, stride_c, stride_h, 1); + bmk1880_gdma_store_stride(bmk_ctx_, lmem, gaddr, stride, ctrls); + bmk1880_tl_free(bmk_ctx_, lmem); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tgdma_store_stride: global=0x%x, local=0x%x, " + "shape=(%d,%d,%d,%d), " + "stride=(%d,%d,%d,%d), aligned=%d, WEIGHT=%d\n", + index++, addr_global, addr_local, n, c, h, w, stride_n, + stride_c, stride_h, 1, is_local_aligned, in_weight_space)); +} + +void BM1880CodeGenBMK::visit(const SophonMIMacConstQ8Inst *inst) { + auto value_input = inst->getSrc(); + auto value_output_high = inst->getDestHigh(); + auto value_output_low = inst->getDestLow(); + + auto addr_ifmap = emitValueAddress(value_input); + auto addr_ofmap_high = emitValueAddress(value_output_high); + auto addr_ofmap_low = emitValueAddress(value_output_low); + + auto in_dim = value_input->getType()->dims(); + auto out_dim = value_output_high->getType()->dims(); + + int input_n; + int ic; + int ih; + int iw; + int oc; + int oh; + int ow; + + input_n = in_dim[0]; + ic = in_dim[1]; + ih = in_dim[2]; + iw = in_dim[3]; + + oc = out_dim[1]; + oh = out_dim[2]; + ow = out_dim[3]; + + int right_shift_width = inst->getRShiftWidth(); + int left_shift_width = inst->getLShiftWidth(); + bool res_is_int8 = inst->getIsResultI8(); + + int multiplier = inst->getMultiplier(); + bool is_multiplier_signed = inst->getIsMultiplierSigned(); + + // bmkernel begins + tensor_lmem *output_high; + tensor_lmem *output_low; + tensor_lmem *input; + + output_high = bmk1880_tl_prealloc_align( + bmk_ctx_, addr_ofmap_high, shape_t4(input_n, oc, oh, ow), FMT_I8); + + output_low = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap_low, + shape_t4(input_n, oc, oh, ow), FMT_I8); + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, + shape_t4(input_n, ic, ih, iw), FMT_I8); + + bmk1880_mac_const_param_t param; + param.res_high = output_high; + param.res_low = output_low; + param.res_is_int8 = res_is_int8; + param.a = input; + param.b = multiplier; + param.b_is_signed = is_multiplier_signed; + param.lshift_width = left_shift_width; + param.rshift_width = right_shift_width; + bmk1880_tpu_mac_const(bmk_ctx_, ¶m); + + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output_low); + bmk1880_tl_free(bmk_ctx_, output_high); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tmac_imm: output_high=0x%x, output_low=0x%x, input=0x%x, " + "imm=%d, output_shape=(%d,%d,%d,%d), " + "input_shape=(%d,%d,%d,%d), is_imm_signed=%d, " + "res_is_int8=%d, lshift=%d, rshift=%d\n", + index++, addr_ofmap_high, addr_ofmap_low, addr_ifmap, + multiplier, input_n, oc, oh, ow, input_n, ic, ih, iw, + is_multiplier_signed, res_is_int8, left_shift_width, + right_shift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIMulConstQ8Inst *inst) { + auto *src = inst->getSrc(); + auto value_output_low = inst->getDest(); + + auto addr_ifmap = emitValueAddress(src); + auto addr_ofmap_low = emitValueAddress(value_output_low); + + auto tensor_dim = src->getType()->dims(); + int n = tensor_dim[0]; + int c = tensor_dim[1]; + int h = tensor_dim[2]; + int w = tensor_dim[3]; + + u8 b = inst->getMultiplier(); + bool b_is_signed = inst->getIsMultiplierSigned(); + int rshift_width = inst->getRShiftWidth(); + + // bmkernel begins + tensor_lmem *input; + tensor_lmem *output_low; + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, shape_t4(n, c, h, w), + FMT_I8); + + output_low = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap_low, + shape_t4(n, c, h, w), FMT_I8); + + bmk1880_mul_const_param_t param; + param.res_high = NULL; + param.res_low = output_low; + param.a = input; + param.b = b; + param.b_is_signed = b_is_signed; + param.rshift_width = rshift_width; + bmk1880_tpu_mul_const(bmk_ctx_, ¶m); + bmk1880_tl_free(bmk_ctx_, output_low); + bmk1880_tl_free(bmk_ctx_, input); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tmul_imm: output_high=0x%x, output_low=0x%x, input=%x, " + "imm=%d, shape=(%d,%d,%d,%d), is_imm_signed=%d, rshift=%d\n", + index++, NULL, addr_ofmap_low, addr_ifmap, b, n, c, h, w, + b_is_signed, rshift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIMulConstQ16Inst *inst) { + auto *src = inst->getSrc(); + auto value_output_high = inst->getDestHigh(); + auto value_output_low = inst->getDestLow(); + + auto addr_ifmap = emitValueAddress(src); + auto addr_ofmap_high = emitValueAddress(value_output_high); + auto addr_ofmap_low = emitValueAddress(value_output_low); + + auto tensor_dim = src->getType()->dims(); + int n = tensor_dim[0]; + int c = tensor_dim[1]; + int h = tensor_dim[2]; + int w = tensor_dim[3]; + + u8 b = inst->getMultiplier(); + bool b_is_signed = inst->getIsMultiplierSigned(); + int rshift_width = inst->getRShiftWidth(); + + // bmkernel begins + tensor_lmem *input; + tensor_lmem *output_high; + tensor_lmem *output_low; + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, shape_t4(n, c, h, w), + FMT_I8); + + output_high = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap_high, + shape_t4(n, c, h, w), FMT_I8); + + output_low = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap_low, + shape_t4(n, c, h, w), FMT_I8); + + bmk1880_mul_const_param_t param; + param.res_high = output_high; + param.res_low = output_low; + param.a = input; + param.b = b; + param.b_is_signed = b_is_signed; + param.rshift_width = rshift_width; + bmk1880_tpu_mul_const(bmk_ctx_, ¶m); + bmk1880_tl_free(bmk_ctx_, output_low); + bmk1880_tl_free(bmk_ctx_, output_high); + bmk1880_tl_free(bmk_ctx_, input); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tmul_imm: output_high=%x, output_low=%x, input=%x, " + "imm=%d, shape=(%d,%d,%d,%d), is_imm_signed=%d, rshift=%d\n", + index++, addr_ofmap_high, addr_ofmap_low, addr_ifmap, b, n, c, + h, w, b_is_signed, rshift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIReluQ8Inst *inst) { + auto *dst = inst->getDest(); + auto *src = inst->getSrc(); + auto addr_ifmap = emitValueAddress(src); + auto addr_ofmap = emitValueAddress(dst); + + auto tensor_dim = src->getType()->dims(); + + // should assert input dim == output dim + + int n, c, h, w; + if (tensor_dim.size() == 4) { + n = tensor_dim[0]; + c = tensor_dim[1]; + h = tensor_dim[2]; + w = tensor_dim[3]; + } else { + assert(tensor_dim.size() == 2); + // FIXME, use getMemoryShape + int M = tensor_dim[0]; + int N = tensor_dim[1]; + auto idiv_round = [](int Num, int Denominator) { + return (Num + Denominator - 1) / Denominator; + }; + + if (N > 32) { + w = 32; + } else { + w = 16; + } + n = M; + h = 1; + c = idiv_round(N, w); + } + + // bmkernel code + laddr_t in_addr = addr_ifmap; + laddr_t out_addr = addr_ofmap; + shape_t dim = shape_t4(n, c, h, w); + + tensor_lmem *input = + bmk1880_tl_prealloc_align(bmk_ctx_, in_addr, dim, FMT_I8); + tensor_lmem *output = + bmk1880_tl_prealloc_align(bmk_ctx_, out_addr, dim, FMT_I8); + bmk1880_relu_param_t relu_param; + relu_param.ofmap = output; + relu_param.ifmap = input; + bmk1880_tpu_relu(bmk_ctx_, &relu_param); + bmk1880_tl_free(bmk_ctx_, output); + bmk1880_tl_free(bmk_ctx_, input); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\trelu: ouput=%x, input=%x, shape=(%d,%d,%d,%d)\n", index++, + addr_ofmap, addr_ifmap, n, c, h, w)); +} + +void BM1880CodeGenBMK::visit(const SophonMIAvgPoolingQ8Inst *inst) { + auto *dst = inst->getDest(); + auto *src = inst->getSrc(); + auto addr_ifmap = emitValueAddress(src); + auto addr_ofmap = emitValueAddress(dst); + + auto in_dim = src->getType()->dims(); + auto out_dim = dst->getType()->dims(); + + int input_n; + int ic; + int ih; + int iw; + int oc; + int oh; + int ow; + int kh; + int kw; + u8 stride_h; + u8 stride_w; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; + int rshift_width; + + input_n = in_dim[0]; + + ic = in_dim[1]; + ih = in_dim[2]; + iw = in_dim[3]; + + oc = out_dim[1]; + oh = out_dim[2]; + ow = out_dim[3]; + + kh = inst->getKernelHW()[0]; + kw = inst->getKernelHW()[1]; + + stride_h = inst->getStrideHW()[0]; + stride_w = inst->getStrideHW()[1]; + + pad_top = inst->getPadTLBR()[0]; + pad_left = inst->getPadTLBR()[1]; + pad_bottom = inst->getPadTLBR()[2]; + pad_right = inst->getPadTLBR()[3]; + + rshift_width = inst->getRShiftWidth(); + + // bmkernel code begins + tensor_lmem *output; + tensor_lmem *input; + + output = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap, + shape_t4(input_n, oc, oh, ow), FMT_I8); + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, + shape_t4(input_n, ic, ih, iw), FMT_I8); + + bmk1880_avg_pooling_param_t param; + param.ofmap = output; + param.ifmap = input; + param.kh = kh; + param.kw = kw; + param.ins_h = 0; + param.ins_last_h = 0; + param.ins_w = 0; + param.ins_last_w = 0; + param.pad_top = pad_top; + param.pad_left = pad_left; + param.pad_bottom = pad_bottom; + param.pad_right = pad_right; + param.stride_h = stride_h; + param.stride_w = stride_w; + param.avg_pooling_const = 0; + param.rshift_width = rshift_width; + bmk1880_tpu_avg_pooling(bmk_ctx_, ¶m); + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output); + DEBUG_GLOW( + llvm::dbgs() << llvm::format( + "%d\tavg_pooling: ouput=0x%x, input=0x%x, out_shape=(%d,%d,%d,%d), " + "input_shape=(%d,%d,%d,%d), kernel=(%d,%d), padTLBR=(%d,%d,%d,%d), " + "strideHW=(%d,%d), avg_pooling_const=%d, rshift=%d\n", + index++, addr_ofmap, addr_ifmap, input_n, oc, oh, ow, input_n, ic, ih, + iw, kh, kw, pad_top, pad_left, pad_bottom, pad_right, stride_h, + stride_w, 0, rshift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIMaxPoolingQ8Inst *inst) { + auto *dst = inst->getDest(); + auto *src = inst->getSrc(); + auto addr_ifmap = emitValueAddress(src); + auto addr_ofmap = emitValueAddress(dst); + + auto in_dim = src->getType()->dims(); + auto out_dim = dst->getType()->dims(); + + int input_n; + int ic; + int ih; + int iw; + int oc; + int oh; + int ow; + int kh; + int kw; + u8 stride_h; + u8 stride_w; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; + + input_n = in_dim[0]; + + ic = in_dim[1]; + ih = in_dim[2]; + iw = in_dim[3]; + + oc = out_dim[1]; + oh = out_dim[2]; + ow = out_dim[3]; + + kh = inst->getKernelHW()[0]; + kw = inst->getKernelHW()[1]; + + stride_h = inst->getStrideHW()[0]; + stride_w = inst->getStrideHW()[1]; + + pad_top = inst->getPadTLBR()[0]; + pad_left = inst->getPadTLBR()[1]; + pad_bottom = inst->getPadTLBR()[2]; + pad_right = inst->getPadTLBR()[3]; + // bmkernel begins + tensor_lmem *output; + tensor_lmem *input; + + output = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap, + shape_t4(input_n, oc, oh, ow), FMT_I8); + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, + shape_t4(input_n, ic, ih, iw), FMT_I8); + + bmk1880_max_pooling_param_t param; + param.ofmap = output; + param.ifmap = input; + param.kh = kh; + param.kw = kw; + param.pad_top = pad_top; + param.pad_bottom = pad_bottom; + param.pad_left = pad_left; + param.pad_right = pad_right; + param.stride_h = stride_h; + param.stride_w = stride_w; + + bmk1880_tpu_max_pooling(bmk_ctx_, ¶m); + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output); + DEBUG_GLOW( + llvm::dbgs() << llvm::format( + "%d\tmax_pooling: ouput=0x%x, input=0x%x, out_shape=(%d,%d,%d,%d), " + "input_shape=(%d,%d,%d,%d), kernel=(%d,%d), padTLBR=(%d,%d,%d,%d), " + "strideHW=(%d,%d)\n", + index++, addr_ofmap, addr_ifmap, input_n, oc, oh, ow, input_n, ic, ih, + iw, kh, kw, pad_top, pad_left, pad_bottom, pad_right, stride_h, + stride_w)); +} + +void BM1880CodeGenBMK::visit(const SophonMIConvolutionQ8Inst *inst) { + auto value_input = inst->getSrc(); + auto value_output = inst->getDest(); + auto value_bias = inst->getBias(); + auto value_filter = inst->getFilter(); + + auto addr_ifmap = emitValueAddress(value_input); + auto addr_ofmap = emitValueAddress(value_output); + auto addr_bias = emitValueAddress(value_bias); + auto addr_filter = emitValueAddress(value_filter); + + auto in_dim = value_input->getType()->dims(); + auto out_dim = value_output->getType()->dims(); + auto kern_dim = value_filter->getType()->dims(); + + int input_n; + int ic; + int ih; + int iw; + int oc; + int oh; + int ow; + int kh; + int kw; + u8 stride_h; + u8 stride_w; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; + u8 ins_h; + u8 ins_w; + u8 ins_last_h; + u8 ins_last_w; + u8 dilation_h; + u8 dilation_w; + bool relu_enable; + int rshift_width; + + input_n = in_dim[0]; + ic = in_dim[1]; + ih = in_dim[2]; + iw = in_dim[3]; + + oc = out_dim[1]; + oh = out_dim[2]; + ow = out_dim[3]; + + kh = kern_dim[2]; + kw = kern_dim[3]; + + stride_h = inst->getStrideHW()[0]; + stride_w = inst->getStrideHW()[1]; + + pad_top = inst->getPadTLBR()[0]; + pad_left = inst->getPadTLBR()[1]; + pad_bottom = inst->getPadTLBR()[2]; + pad_right = inst->getPadTLBR()[3]; + + ins_h = 0; + ins_w = 0; + ins_last_h = 0; + ins_last_w = 0; + + dilation_h = inst->getDilationHW()[0]; + dilation_w = inst->getDilationHW()[1]; + + relu_enable = inst->getEnableRelu(); + rshift_width = inst->getRShiftWidth(); + + // below bmkernel code begins + + tensor_lmem *output; + tensor_lmem *input; + tensor_lmem *weight; + tensor_lmem *bias; + + output = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap, + shape_t4(input_n, oc, oh, ow), FMT_I8); + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, + shape_t4(input_n, ic, ih, iw), FMT_I8); + + weight = bmk1880_tl_prealloc(bmk_ctx_, addr_filter, shape_t4(ic, oc, kh, kw), + FMT_I8); + + bias = + bmk1880_tl_prealloc(bmk_ctx_, addr_bias, shape_t4(2, oc, 1, 1), FMT_I8); + + bmk1880_conv_param_t param; + param.ofmap = output; + param.ifmap = input; + param.weight = weight; + param.bias = bias; + + param.ins_h = ins_h; + param.ins_last_h = ins_last_h; + param.ins_w = ins_w; + param.ins_last_w = ins_last_w; + + param.pad_top = pad_top; + param.pad_bottom = pad_bottom; + param.pad_left = pad_left; + param.pad_right = pad_right; + param.stride_h = stride_h; + param.stride_w = stride_w; + param.dilation_h = dilation_h; + param.dilation_w = dilation_w; + param.relu_enable = relu_enable; + param.rshift_width = rshift_width; + bmk1880_tpu_conv(bmk_ctx_, ¶m); + + bmk1880_tl_free(bmk_ctx_, bias); + bmk1880_tl_free(bmk_ctx_, weight); + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output); + DEBUG_GLOW( + llvm::dbgs() << llvm::format( + "%d\tconvolution: ouput=0x%x, input=0x%x, filiter=0x%x, bias=0x%x, " + "out_shape=(%d,%d,%d,%d), input_shape=(%d,%d,%d,%d), " + "kernel_shape=(%d,%d,%d,%d), padTLBR=(%d,%d,%d,%d), " + "strideHW=(%d,%d), dilationHW(%d,%d), enable_relu=%d, rshift=%d\n", + index++, addr_ofmap, addr_ifmap, addr_filter, addr_bias, input_n, oc, + oh, ow, input_n, ic, ih, iw, ic, oc, kh, kw, pad_top, pad_left, + pad_bottom, pad_right, stride_h, stride_w, dilation_h, dilation_w, + relu_enable, rshift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIDepthwiseConvolutionQ8Inst *inst) { + auto value_input = inst->getSrc(); + auto value_output = inst->getDest(); + auto value_bias = inst->getBias(); + auto value_filter = inst->getFilter(); + + auto addr_ifmap = emitValueAddress(value_input); + auto addr_ofmap = emitValueAddress(value_output); + auto addr_bias = emitValueAddress(value_bias); + auto addr_filter = emitValueAddress(value_filter); + + auto in_dim = value_input->getType()->dims(); + auto out_dim = value_output->getType()->dims(); + auto kern_dim = value_filter->getType()->dims(); + + int input_n; + int ic; + int ih; + int iw; + int oc; + int oh; + int ow; + int kh; + int kw; + u8 stride_h; + u8 stride_w; + u8 pad_top; + u8 pad_left; + u8 pad_bottom; + u8 pad_right; + u8 ins_h; + u8 ins_w; + u8 ins_last_h; + u8 ins_last_w; + int rshift_width; + + input_n = in_dim[0]; + ic = in_dim[1]; + ih = in_dim[2]; + iw = in_dim[3]; + + oc = out_dim[1]; + oh = out_dim[2]; + ow = out_dim[3]; + + kh = kern_dim[2]; + kw = kern_dim[3]; + + stride_h = inst->getStrideHW()[0]; + stride_w = inst->getStrideHW()[1]; + + pad_top = inst->getPadTLBR()[0]; + pad_left = inst->getPadTLBR()[1]; + pad_bottom = inst->getPadTLBR()[2]; + pad_right = inst->getPadTLBR()[3]; + + ins_h = 0; + ins_w = 0; + ins_last_h = 0; + ins_last_w = 0; + + rshift_width = inst->getRShiftWidth(); + + // below bmkernel code begins + tensor_lmem *output; + tensor_lmem *input; + tensor_lmem *weight; + tensor_lmem *bias = nullptr; + + output = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap, + shape_t4(input_n, oc, oh, ow), FMT_I8); + + input = bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, + shape_t4(input_n, ic, ih, iw), FMT_I8); + + // Depthwise + weight = bmk1880_tl_prealloc_align(bmk_ctx_, addr_filter, + shape_t4(1, oc, kh, kw), FMT_I8); + + bias = + bmk1880_tl_prealloc(bmk_ctx_, addr_bias, shape_t4(2, oc, 1, 1), FMT_I8); + + bmk1880_depthwise_param_t param; + param.ofmap = output; + param.ifmap = input; + param.weight = weight; + param.bias = bias; + + param.ins_h = ins_h; + param.ins_last_h = ins_last_h; + param.ins_w = ins_w; + param.ins_last_w = ins_last_w; + + param.pad_top = pad_top; + param.pad_bottom = pad_bottom; + param.pad_left = pad_left; + param.pad_right = pad_right; + + param.stride_h = stride_h; + param.stride_w = stride_w; + + param.rshift_width = rshift_width; + bmk1880_tpu_depthwise(bmk_ctx_, ¶m); + + bmk1880_tl_free(bmk_ctx_, bias); + bmk1880_tl_free(bmk_ctx_, weight); + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output); + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tdepthwise: ouput=%d, input=%d, filiter=%d, bias=%d\n", + index++, addr_ofmap, addr_ifmap, addr_filter, addr_bias)); + DEBUG_GLOW( + llvm::dbgs() << llvm::format( + "%d\tdepthwise: ouput=0x%x, input=0x%x, filiter=0x%x, bias=0x%x, " + "out_shape=(%d,%d,%d,%d), input_shape=(%d,%d,%d,%d), " + "kernel_shape=(%d,%d,%d,%d), padTLBR=(%d,%d,%d,%d), " + "strideHW=(%d,%d), ins_h=(%d,%d), ins_w=(%d,%d), rshift=%d\n", + index++, addr_ofmap, addr_ifmap, addr_filter, addr_bias, input_n, oc, + oh, ow, input_n, ic, ih, iw, 1, oc, kh, kw, pad_top, pad_left, + pad_bottom, pad_right, stride_h, stride_w, ins_h, ins_last_h, ins_w, + ins_last_w, rshift_width)); +} + +template +void BM1880CodeGenBMK::bmk_matrix_mac(const T *inst, bool res_is_int8) { + auto value_input = inst->getSrc(); + auto value_output = inst->getDest(); + auto value_bias = inst->getBias(); + auto value_filter = inst->getFilter(); + + auto addr_ifmap = emitValueAddress(value_input); + auto addr_ofmap = emitValueAddress(value_output); + auto addr_bias = emitValueAddress(value_bias); + auto addr_filter = emitValueAddress(value_filter); + + int right_shift_width = inst->getRShiftWidth(); + int left_shift_width = inst->getLShiftWidth(); + bool res_add = inst->getResultAdd(); + + int M; + int K; + int N; + + auto in_dim = value_input->getType()->dims(); + auto kern_dim = value_filter->getType()->dims(); + + M = in_dim[0]; + K = in_dim[1]; + N = kern_dim[1]; + + // assert in, kern, and out dim + + // bmkernel code + tensor_lmem *output; + tensor_lmem *input; + tensor_lmem *weight; + tensor_lmem *bias; + + if (res_is_int8) { + output = + bmk1880_tl_prealloc_align(bmk_ctx_, addr_ofmap, shape_t2(M, N), FMT_I8); + } else { + output = bmk1880_tl_prealloc_align( + bmk_ctx_, addr_ofmap, shape_t2(2 * M, N), // reserve for 16-bit output + FMT_I8); + } + + input = + bmk1880_tl_prealloc_align(bmk_ctx_, addr_ifmap, shape_t2(M, K), FMT_I8); + + weight = + bmk1880_tl_prealloc_align(bmk_ctx_, addr_filter, shape_t2(K, N), FMT_I8); + + bias = bmk1880_tl_prealloc_align(bmk_ctx_, addr_bias, shape_t2(2, N), FMT_I8); + + bmk1880_matrix_mac_param_t p; + p.res = output; // 16-bit output space + p.left = input; + p.right = weight; + p.bias = bias; + + p.lshift_width = left_shift_width; + p.rshift_width = right_shift_width; + + p.res_is_int8 = res_is_int8; + p.ctrls = CTRL_NULL; + if (res_add) { + p.ctrls = CTRL_RA; + } + + bmk1880_tpu_matrix_mac(bmk_ctx_, &p); + bmk1880_tl_free(bmk_ctx_, bias); + bmk1880_tl_free(bmk_ctx_, weight); + bmk1880_tl_free(bmk_ctx_, input); + bmk1880_tl_free(bmk_ctx_, output); + + DEBUG_GLOW(llvm::dbgs() << llvm::format( + "%d\tmatrix_mac: ouput=0x%x, input=0x%x, filiter=0x%x, " + "bias=0x%x, output_shape=(%d,%d), input_shape=(%d,%d), " + "weight_shape=(%d,%d), res_is_int8=%d, res_add=%d, lshift=%d, " + "rshift=%d\n", + index++, addr_ofmap, addr_ifmap, addr_filter, addr_bias, M, N, + M, K, K, N, res_is_int8, res_add, left_shift_width, + right_shift_width)); +} + +void BM1880CodeGenBMK::visit(const SophonMIFCQ16Inst *inst) { + bool res_is_int8 = false; + bmk_matrix_mac(inst, res_is_int8); +} + +void BM1880CodeGenBMK::visit(const SophonMIFCQ8Inst *inst) { + bool res_is_int8 = true; + bmk_matrix_mac(inst, res_is_int8); +} + +void BM1880CodeGenBMK::performCodeGen() { + DEBUG_GLOW(F_->dump()); + auto &instrs = F_->getInstrs(); + bmk_init(); + int index = 0; + for (auto &I : instrs) { + accept_helper(&I); + } + bmk_deinit(); +} + +std::vector BM1880CodeGenBMK::getCmdbuf() { return cmdbuf_; } + +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.h b/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.h new file mode 100644 index 0000000000..8cec0469bd --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880CodeGenBMK.h @@ -0,0 +1,52 @@ +#ifndef BM1880_CODEGEN_BMK_H +#define BM1880_CODEGEN_BMK_H + +#include "BM1880CodeGen.h" +#include "Backends/Sophon/GlowLIRVisitor.h" +#include + +namespace glow { + +class BM1880CodeGenBMK : public BM1880CodeGen, public GlowLIRVisitor { +public: + BM1880CodeGenBMK(IRFunction *F, AllocationsInfo &allocInfo); + void visit(const SophonMIMulConstQ8Inst *inst) override; + void visit(const SophonMIMulConstQ16Inst *inst) override; + void visit(const SophonMIMacConstQ8Inst *inst) override; + void visit(const SophonMIReluQ8Inst *inst) override; + void visit(const SophonMIAvgPoolingQ8Inst *inst) override; + void visit(const SophonMIMaxPoolingQ8Inst *inst) override; + void visit(const SophonMIConvolutionQ8Inst *inst) override; + void visit(const SophonMIDepthwiseConvolutionQ8Inst *inst) override; + void visit(const SophonMIFCQ16Inst *inst) override; + void visit(const SophonMIFCQ8Inst *inst) override; + void visit(const SophonMIGDMAGlobalToLocalInst *inst) override; + void visit(const SophonMIGDMALocalToGlobalInst *inst) override; + + void visit(const AllocActivationInst *inst) { index++; } + void visit(const DeallocActivationInst *inst) { index++; } + void default_method(glow::Instruction *Inst) { + llvm_unreachable("Unknown value kind"); + } + void performCodeGen() override; + std::vector getCmdbuf() override; + +private: + template void bmk_matrix_mac(const T *inst, bool res_is_int8); + void bmk_init(); + void bmk_deinit(); + uint64_t emitValueAddress(const glow::Value *val); + +private: + bmk1880_context_t *bmk_ctx_; + bmk_info_t bmk_info_; + AllocationsInfo &allocInfo_; + const IRFunction *F_; + std::vector cmdbuf_; + int cmdbuf_size_; + int index{0}; +}; + +} // namespace glow + +#endif // BM1880_CODEGEN_BMK_H diff --git a/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.cpp b/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.cpp new file mode 100644 index 0000000000..2ad55df299 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.cpp @@ -0,0 +1,77 @@ +#include "BM1880DumpAllPass.h" +#include "Backends/Sophon/AllocationsInfo.h" +#include "Backends/Sophon/GlowLIRVisitor.h" +#include "glow/IR/IRBuilder.h" +#include "glow/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace glow; + +#define DEBUG_TYPE "GenLS" + +class DumpAllPass : public GlowLIRVisitor { +public: + void runOnIRFunction(IRFunction *F); + +private: + void visit(AllocActivationInst *Inst) override {} + void visit(DeallocActivationInst *Inst) override {} + void visit(SophonStoreInst *Inst) override {} + void visit(SophonLoadInst *Inst) override {} + + void default_method(glow::Instruction *Inst) override { + std::cout << "debug: " << Inst->getName().str() << std::endl; + IRBuilder builder(F_); + for (unsigned i = 0; i < Inst->getNumOperands(); i++) { + const auto &op = Inst->getOperand(i); + if (op.second != OperandKind::Out) + continue; + auto *dest = op.first; + if (not llvm::isa(dest)) + continue; + // add weightVar + auto *W = builder.createWeightVar(dest->getType(), + dest->getName().str() + ".spill", + WeightVar::MutabilityKind::Mutable); + // add placeholder + auto *PH = F_->getGraph()->getParent()->createPlaceholder( + dest->getType(), + std::string("save_") + Inst->getName().str() + "_dump", false); + // update variable map + F_->getVariableMap()[PH] = W; + // add load inst + auto *S = builder.createSophonStoreInst(Inst->getName().str() + "_dump", + W, dest); + F_->moveInstruction(Inst, S); + F_->moveInstruction(S, Inst); + } + } + + IRFunction *F_; +}; + +void DumpAllPass::runOnIRFunction(IRFunction *F) { + F_ = F; + auto &instrs = F->getInstrs(); + for (auto it = instrs.begin(), e = instrs.end(); it != e;) { + auto cur = it; + auto &I = *it; + it++; + accept_helper(&I); + } +} + +namespace glow { +namespace sophon { +void runDumpAllPass(IRFunction *F) { + auto p = llvm::make_unique(); + p->runOnIRFunction(F); +} + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.h b/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.h new file mode 100644 index 0000000000..8a78811fba --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880DumpAllPass.h @@ -0,0 +1,9 @@ +#pragma once + +#include "glow/IR/IR.h" + +namespace glow { +namespace sophon { +void runDumpAllPass(IRFunction *F); +} +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.cpp b/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.cpp new file mode 100644 index 0000000000..e40c3685dd --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.cpp @@ -0,0 +1,363 @@ +#include "BM1880ExpandSophonInst.h" +#include "Backends/Sophon/GlowLIRVisitor.h" +#include "Backends/Sophon/Utility/memory.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/IRUtils.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "expandSophonInst" + +using namespace glow; + +class ExpandVisitor : public GlowLIRVisitor { +public: + ExpandVisitor(IRFunction *F, const BM1880AllocationsInfo &allocationsInfo) + : F_(F), allocationsInfo_(allocationsInfo), Builder_(F) {} + ~ExpandVisitor() = default; + void visit(SophonLoadInst *Inst) override; + void visit(SophonStoreInst *Inst) override; + void visit(SophonConvolutionQ8Inst *Inst) override; + void visit(SophonMaxPoolQ8Inst *Inst) override; + void visit(SophonFullyConnectedQ8Inst *Inst) override; + void visit(SophonReluQ8Inst *Inst) override; + void visit(AllocActivationInst *Inst) override {} + void visit(DeallocActivationInst *Inst) override {} + + void default_method(glow::Instruction *Inst) override; + +private: + // load constnat or placeholder + bool isConstant(glow::SophonLoadInst *Inst); + bool isLocalAligned(glow::SophonLoadInst *Inst); + +private: + IRFunction *F_; + const BM1880AllocationsInfo &allocationsInfo_; + IRBuilder Builder_; + std::vector oldInsts_; +}; + +void ExpandVisitor::default_method(glow::Instruction *Inst) { + DEBUG_GLOW(llvm::dbgs() << "unsupport ExpandVisitor " << Inst->getName() + << "\n"); + llvm_unreachable("TODO!"); +} + +// load constant weight or not +bool ExpandVisitor::isConstant(SophonLoadInst *Inst) { + auto *W = llvm::cast(getOrigin(Inst->getSrc())); + if (W->getMutability() == WeightVar::MutabilityKind::Constant) + return true; + return false; +} + +static std::vector getStride(const std::vector &Dims) { + assert(Dims.size() == 4); + return {Dims[1] * Dims[2] * Dims[3], Dims[2] * Dims[3], Dims[3]}; +} + +static void setGlobalStride(const std::vector &GlobalDims, + const std::vector &LocalDims, + std::vector &GlobalStrideNCH) { + auto sg = getStride(GlobalDims); + auto sl = getStride(LocalDims); + GlobalStrideNCH = {std::min(sg[0], sl[0]), std::min(sg[1], sl[1]), + std::min(sg[2], sl[2])}; +} + +static void setDefautByDims(const std::vector &Dims, + std::vector &ShapeNCHW, + std::vector &GlobalStrideNCH) { + assert(Dims.size() == 4); + ShapeNCHW = {Dims[0], Dims[1], Dims[2], Dims[3]}; + GlobalStrideNCH = {Dims[1] * Dims[2] * Dims[3], Dims[2] * Dims[3], Dims[3]}; +} + +static void setDefaultChannelWise(unsigned Channel, + std::vector &ShapeNCHW, + std::vector &GlobalStrideNCH) { + ShapeNCHW = {2, Channel, 1, 1}; + GlobalStrideNCH = {Channel, 1, 1}; +} + +static bool initConvLoadInfo(unsigned Idx, const std::vector &Dims, + bool &IsLocalAligned, + std::vector &ShapeNCHW, + std::vector &GlobalStrideNCH) { + bool is_constant; + switch (Idx) { + case 1: + // input + IsLocalAligned = true; + is_constant = false; + setDefautByDims(Dims, ShapeNCHW, GlobalStrideNCH); + break; + case 2: { + // weight + IsLocalAligned = false; + is_constant = true; + // see plat-bm188x/bmkernel/conv_parallel_bmkernel.cpp +238 + // (1, oc, kh*kw, ic) + unsigned oc = Dims[0]; + unsigned ic = Dims[1]; + ShapeNCHW = {1, oc, Dims[2] * Dims[3], ic}; + // see plat-bm188x/bmkernel/conv_parallel_bmkernel.cpp +298 + // (oc*kh*kw*ic, kh*kw*ic, ic) + GlobalStrideNCH = {oc * Dims[2] * Dims[3] * ic, Dims[2] * Dims[3] * ic, ic}; + break; + } + case 3: + // bias + IsLocalAligned = false; + is_constant = true; + setDefaultChannelWise(Dims[0], ShapeNCHW, GlobalStrideNCH); + break; + default: + llvm_unreachable("TODO!"); + } + return is_constant; +} + +// FIXME + +// Implicit W parameter Rule: +// This is based on bmkernel implementation. +// When W is not provided with 2D tensor, we use the algorithm to decide W. +template +static bool get_hw_dim(const T &vec, unsigned *n, unsigned *c, unsigned *h, + unsigned *w) { + bool ret = false; + size_t dim = vec.size(); + switch (dim) { + case 4: + *n = vec[0]; + *c = vec[1]; + *h = vec[2]; + *w = vec[3]; + ret = true; + break; + case 3: + assert(false && "Not support Dimension = 3"); + ret = false; + break; + case 2: { + unsigned M = vec[0]; + unsigned N = vec[1]; + if (N > 32) { + *w = 32; + } else { + *w = 16; + } + *n = M; + *h = 1; + *c = sophon::idiv_round(N, *w); + ret = true; + } break; + case 1: + *n = 1; + *c = vec[0]; + *h = 1; + *w = 1; + ret = true; + break; + default: + assert(false && "Dimension is not between 1~4"); + ret = false; + break; + } + return ret; +} + +static bool initFCLoadInfo(unsigned Idx, const std::vector &Dims, + std::vector &ShapeNCHW, + std::vector &GlobalStrideNCH, + bool &is_local_aligned) { + bool is_constant; + if (Idx == 0 or Idx == 1 or Idx == 2) { + // Idx 0/1 is input + is_constant = Idx <= 1 ? false : true; + unsigned n, c, h, w; + get_hw_dim>(Dims, &n, &c, &h, &w); + ShapeNCHW = {n, c, h, w}; + assert(Dims.size() == 2); + setGlobalStride({Dims[0], 1, 1, Dims[1]}, ShapeNCHW, GlobalStrideNCH); + is_local_aligned = true; + } else if (Idx == 3) { + // bias + is_constant = true; + // Implicit W parameter Rule: + // This is based on bmkernel implementation. + // When W is not provided with 2D tensor, we use the algorithm to decide + // W. + unsigned w; + assert(Dims.size() == 1); + unsigned dim = Dims[0]; + if (dim > 32) { + w = 32; + } else { + w = 16; + } + unsigned c = sophon::idiv_round(dim, w); + ShapeNCHW = {2, c, 1, w}; + setGlobalStride({1, 1, 1, dim}, ShapeNCHW, GlobalStrideNCH); + is_local_aligned = false; + } else + llvm_unreachable("TODO!"); + return is_constant; +} + +void ExpandVisitor::visit(SophonLoadInst *Inst) { + // operands of SophonMIGDMAGlobalToLocalInst + std::vector shape_NCHW; + std::vector global_stride_NCH; + bool is_local_aligned; + bool is_constant; + + auto type = Inst->getDest()->getType(); + std::vector dims{type->dims().begin(), type->dims().end()}; + auto users = Inst->getDest()->getUsers(); + // check user InstKind to init above operands + for (auto &user : users) { + if (user.getOperand().second == OperandKind::Out) + continue; + switch (user.get()->getKind()) { + case glow::Kinded::Kind::SophonConvolutionQ8InstKind: + is_constant = initConvLoadInfo(user.idx_, dims, is_local_aligned, + shape_NCHW, global_stride_NCH); + break; + case glow::Kinded::Kind::SophonFullyConnectedQ8InstKind: + is_constant = initFCLoadInfo(user.idx_, dims, shape_NCHW, + global_stride_NCH, is_local_aligned); + break; +#if 0 + case glow::Kinded::Kind::SophonWinograndInstKind: + case glow::Kinded::Kind::SophonDepthwiseInstKind: + is_local_aligned = user.idx_ == 1? false : true; + is_constant = user.idx_ ==1 ? true : false; + if (user.idx_ == 1) { + setDefautByDims(dims, shape_NCHW, global_stride_NCH); + } else { + setDefaultChannelWise(dims[0], shape_NCHW, global_stride_NCH); + } + case glow::Kinded::Kind::SophonArithmeticInstKind: + is_local_aligned = true; + is_constant = isConstant(Inst); + setDefautByDims(dims, shape_NCHW, global_stride_NCH); +#endif + default: // input/output neruon + setDefautByDims(dims, shape_NCHW, global_stride_NCH); + is_constant = false; + is_local_aligned = true; + DEBUG_GLOW(llvm::dbgs() << "ExpandSophonInst for " + << user.get()->getName() << " Inst\n"); + } + } + + auto newInst = Builder_.createSophonMIGDMAGlobalToLocalInst( + Inst->getName(), Inst->getDest(), Inst->getSrc(), shape_NCHW, + global_stride_NCH, is_constant, is_local_aligned); + + F_->moveInstruction(Inst, newInst); + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void ExpandVisitor::visit(SophonStoreInst *Inst) { + auto type = Inst->getSrc()->getType(); + std::vector dims{type->dims().begin(), type->dims().end()}; + bool is_local_aligned; + std::vector shape_NCHW; + std::vector global_stride_NCH; + + auto users = Inst->getSrc()->getUsers(); + for (auto &user : users) { + // store's user is out MI user + if (user.getOperand().second != OperandKind::Out) + continue; + switch (user.get()->getKind()) { + case glow::Kinded::Kind::SophonMIFCQ8InstKind: + initFCLoadInfo(user.idx_, dims, shape_NCHW, global_stride_NCH, + is_local_aligned); + break; + case glow::Kinded::Kind::DeallocActivationInstKind: + // ignore user deallocInst + break; + default: + is_local_aligned = true; + shape_NCHW = {dims.begin(), dims.end()}; + global_stride_NCH = {shape_NCHW[1] * shape_NCHW[2] * shape_NCHW[3], + shape_NCHW[2] * shape_NCHW[3], shape_NCHW[3]}; + break; + } + } + auto *new_inst = Builder_.createSophonMIGDMALocalToGlobalInst( + Inst->getName(), Inst->getDest(), Inst->getSrc(), shape_NCHW, + global_stride_NCH, false /*IsGlobalWeightSpace*/, is_local_aligned); + F_->moveInstruction(Inst, new_inst); + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void ExpandVisitor::visit(SophonConvolutionQ8Inst *Inst) { + std::vector depends; + auto *newInst = Builder_.createSophonMIConvolutionQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getSrc(), Inst->getFilter(), + Inst->getBias(), Inst->getStrideHW(), Inst->getPadTLBR(), + Inst->getDilationHW(), Inst->getRShiftWidth(), Inst->getEnableRelu(), 0, + 0, depends); + F_->moveInstruction(Inst, newInst); + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void ExpandVisitor::visit(SophonMaxPoolQ8Inst *Inst) { + auto *newPool = Builder_.createSophonMIMaxPoolingQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getSrc(), Inst->getKernelHW(), + Inst->getStrideHW(), Inst->getPadTLBR()); + + auto *newMul = Builder_.createSophonMIMulConstQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getDest(), Inst->getMultiplier(), + 0, Inst->getRShiftWidth()); + F_->moveInstruction(Inst, newPool); + F_->moveInstruction(Inst, newMul); + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void ExpandVisitor::visit(SophonFullyConnectedQ8Inst *Inst) { + // hard code: lshift default is 3 + const int default_lshift = 3; + auto *new_inst = Builder_.createSophonMIFCQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getSrc(), Inst->getWeights(), + Inst->getBias(), Inst->getRShiftWidth(), default_lshift, + Inst->getResultAdd()); + F_->moveInstruction(Inst, new_inst); + if (Inst->getRelu()) { + auto *relu = Builder_.createSophonMIReluQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getDest()); + F_->moveInstruction(Inst, relu); + } + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void ExpandVisitor::visit(SophonReluQ8Inst *Inst) { + auto *new_inst = Builder_.createSophonMIReluQ8Inst( + Inst->getName(), Inst->getDest(), Inst->getSrc()); + F_->moveInstruction(Inst, new_inst); + DEBUG_GLOW(llvm::dbgs() << "visit " << Inst->getName() << "\n"); + F_->eraseInstruction(Inst); +} + +void BM1880ExpandSophonInst::run() { + std::unique_ptr visitor( + new ExpandVisitor(F_, allocationsInfo_)); + auto &instrs = F_->getInstrs(); + for (auto it = instrs.begin(), e = instrs.end(); it != e;) { + auto &I = *it; + it++; + visitor->accept_helper(&I); + } +} diff --git a/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.h b/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.h new file mode 100644 index 0000000000..799fee5db7 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880ExpandSophonInst.h @@ -0,0 +1,20 @@ +#pragma once + +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "glow/IR/IR.h" +#include "glow/IR/Instrs.h" + +namespace glow { +class BM1880ExpandSophonInst { + +public: + BM1880ExpandSophonInst(IRFunction *F, + const BM1880AllocationsInfo &allocationsInfo) + : F_(F), allocationsInfo_(allocationsInfo) {} + void run(); + +private: + IRFunction *F_; + const BM1880AllocationsInfo &allocationsInfo_; +}; +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.cpp b/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.cpp new file mode 100644 index 0000000000..93c82f5c31 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.cpp @@ -0,0 +1,89 @@ +#include "BM1880HandleReshapePass.h" +#include "Backends/Sophon/AllocationsInfo.h" +#include "Backends/Sophon/GlowLIRVisitor.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/IRUtils.h" +#include "glow/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace glow; + +#define DEBUG_TYPE "GenLS" + +class HandleReshape : public GlowLIRVisitor { +public: + void runOnIRFunction(IRFunction *F); + +private: + void visit(TensorViewInst *tv) override; + + void default_method(glow::Instruction *Inst) override {} + + IRFunction *F_; +}; + +void HandleReshape::visit(TensorViewInst *tv) { + IRBuilder builder(F_); + // auto get_use = [&](glow::Instruction *Inst) + auto origin = getOrigin(tv); + // we only handle local reshape only + if (not llvm::isa(origin)) + return; + + std::string name = std::string(tv->getName()); + // gen tmp weight + auto *W = builder.createWeightVar(origin->getType(), name + ".spill", + WeightVar::MutabilityKind::Mutable); + auto *PH = F_->getGraph()->getParent()->createPlaceholder( + origin->getType(), + llvm::StringRef(std::string("save_") + name + "_sophon_spill"), false); + // update variable map + F_->getVariableMap()[PH] = W; + + // insert store + auto *S = builder.createSophonStoreInst(name, W, origin); + F_->moveInstruction(tv, S); + // insert local tensor + auto *A = builder.createAllocActivationInst(name, tv->getType()); + F_->moveInstruction(tv, A); + // insert load + auto *L = builder.createSophonLoadInst(name, A, W); + F_->moveInstruction(tv, L); + { + auto users = tv->getUsers(); + for (auto it = users.begin(), e = users.end(); it != e;) { + auto &user = *it; + it++; + if (user.getOperand().second == OperandKind::In) + user.setOperand(A); + } + } + // delete tensorView + F_->eraseInstruction(tv); +} + +void HandleReshape::runOnIRFunction(IRFunction *F) { + F_ = F; + + auto &instrs = F->getInstrs(); + for (auto it = instrs.begin(), e = instrs.end(); it != e;) { + auto &I = *it; + it++; + accept_helper(&I); + } +} + +namespace glow { +namespace sophon { +void runHandleReshape(IRFunction *F) { + auto p = llvm::make_unique(); + p->runOnIRFunction(F); +} + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.h b/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.h new file mode 100644 index 0000000000..0a9cc07cb6 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880HandleReshapePass.h @@ -0,0 +1,9 @@ +#pragma once + +#include "glow/IR/IR.h" + +namespace glow { +namespace sophon { +void runHandleReshape(IRFunction *F); +} +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.cpp b/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.cpp new file mode 100644 index 0000000000..4303f6cd1b --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.cpp @@ -0,0 +1,81 @@ +#include "BM1880InsertLoadStorePass.h" +#include "Backends/Sophon/AllocationsInfo.h" +#include "Backends/Sophon/GlowLIRVisitor.h" +#include "glow/IR/IRBuilder.h" +#include "glow/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace glow; + +#define DEBUG_TYPE "GenLS" + +class InsertLoadStorePass : public GlowLIRVisitor { +public: + void runOnIRFunction(IRFunction *F); + +private: + void visit(SophonLoadInst *Inst) override; + void visit(SophonStoreInst *Inst) override; + + void default_method(glow::Instruction *Inst) override { + IRBuilder builder(F_); + for (unsigned i = 0; i < Inst->getNumOperands(); i++) { + const auto &op = Inst->getOperand(i); + if (llvm::isa(op.first)) + continue; + std::string allocate_name = + std::string(Inst->getName()) + "_" + std::string(op.first->getName()); + if (op.second == OperandKind::In) { + auto *A = builder.createAllocActivationInst(allocate_name, + op.first->getType()); + auto *L = + builder.createSophonLoadInst(allocate_name + "_load", A, op.first); + Inst->setOperand(i, A); + F_->moveInstruction(Inst, L); + F_->moveInstruction(L, A); + } else if (op.second == OperandKind::Out) { + auto *A = builder.createAllocActivationInst(allocate_name, + op.first->getType()); + auto *S = builder.createSophonStoreInst(allocate_name + "_store", + op.first, A); + Inst->setOperand(i, A); + F_->moveInstruction(Inst, S); + F_->moveInstruction(S, Inst); + F_->moveInstruction(Inst, A); + } + } + } + + IRFunction *F_; +}; + +// avoid adding load for load +void InsertLoadStorePass::visit(SophonLoadInst *Inst) { (void)Inst; } +// avoid adding load for store +void InsertLoadStorePass::visit(SophonStoreInst *Inst) { (void)Inst; } + +void InsertLoadStorePass::runOnIRFunction(IRFunction *F) { + F_ = F; + auto &instrs = F->getInstrs(); + for (auto it = instrs.begin(), e = instrs.end(); it != e;) { + auto cur = it; + auto &I = *it; + it++; + accept_helper(&I); + } +} + +namespace glow { +namespace sophon { +void runInsertLoadStorePass(IRFunction *F) { + auto p = llvm::make_unique(); + p->runOnIRFunction(F); +} + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h b/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h new file mode 100644 index 0000000000..9db423bc8a --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h @@ -0,0 +1,9 @@ +#pragma once + +#include "glow/IR/IR.h" + +namespace glow { +namespace sophon { +void runInsertLoadStorePass(IRFunction *F); +} +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880Instr.def b/lib/Backends/Sophon/BM188x/BM1880Instr.def new file mode 100644 index 0000000000..ff66968feb --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880Instr.def @@ -0,0 +1,15 @@ +#ifndef DEF_BACKEND_SPECIFIC_INSTR +#error The macro DEF_BACKEND_SPECIFIC_INSTR was not declared. +#endif +DEF_BACKEND_SPECIFIC_INSTR(SophonMIGDMAGlobalToLocalInst, sophonmigdmaglobaltolocal) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIGDMALocalToGlobalInst, sophonmigdmalocaltoglobal) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIReluQ8Inst, sophonmireluq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIMacConstQ8Inst, sophonmimacconstq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIMulConstQ8Inst, sophonmimulconstq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIMulConstQ16Inst, sophonmimulconstq16) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIAvgPoolingQ8Inst, sophonmiavgpoolingq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIMaxPoolingQ8Inst, sophonmimaxpoolingq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIConvolutionQ8Inst, sophonmiconvolutionq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIDepthwiseConvolutionQ8Inst, sophonmidepthwiseconvolutionq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIFCQ8Inst, sophonmifcq8) +DEF_BACKEND_SPECIFIC_INSTR(SophonMIFCQ16Inst, sophonmifcq16) diff --git a/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.cpp b/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.cpp new file mode 100644 index 0000000000..dbc830ce0a --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.cpp @@ -0,0 +1,41 @@ +#include "BM1880MemoryAllocPass.h" +#include "BM1880AllocationsInfo.h" +#include "Backends/Sophon/AllocationsInfo.h" +#include "glow/IR/IRBuilder.h" +#include "glow/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + +using namespace glow; + +#define DEBUG_TYPE "BM1880_mem_alloc" + +class MemoryAllocPass { +public: + MemoryAllocPass(BM1880AllocationsInfo *allocInfo) : _allocInfo(allocInfo) {} + + void runOnIRFunction(IRFunction *F); + +private: + BM1880AllocationsInfo *_allocInfo; +}; + +void MemoryAllocPass::runOnIRFunction(IRFunction *F) { + DEBUG_GLOW(llvm::dbgs() << "MemoryAllocPass::runOnIRFunction\n"); + _allocInfo->allocateWeightVars(F); + _allocInfo->allocateActivations(F); +} + +namespace glow { +namespace sophon { +void runMemoryAllocPass(IRFunction *F, BM1880AllocationsInfo *allocInfo) { + auto p = llvm::make_unique(allocInfo); + p->runOnIRFunction(F); +} + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.h b/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.h new file mode 100644 index 0000000000..8baf61ca21 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880MemoryAllocPass.h @@ -0,0 +1,10 @@ +#pragma once + +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "glow/IR/IR.h" + +namespace glow { +namespace sophon { +void runMemoryAllocPass(IRFunction *F, BM1880AllocationsInfo *allocInfo); +} +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.cpp b/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.cpp new file mode 100644 index 0000000000..4b6474ac60 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.cpp @@ -0,0 +1,84 @@ +#include "BM1880TargetTransformInfo.h" +#include "BM188xLMemSizeVisitor.h" +#include + +namespace glow { +namespace sophon { +BM1880TargetTransformInfo *BM1880TargetTransformInfo::getInstance() { + static BM1880TargetTransformInfo instance; + return &instance; +} + +BM1880TargetTransformInfo::BM1880TargetTransformInfo() {} + +int BM1880TargetTransformInfo::getLocalMemSizeInBytes() const { + return 64 * 1024; +} + +int BM1880TargetTransformInfo::getTPUNum() const { return 1; } + +int BM1880TargetTransformInfo::getNPUNum() const { return 32; } + +int BM1880TargetTransformInfo::getEUNum() const { return 16; } + +size_t +BM1880TargetTransformInfo::getLMemSizeFromValue(glow::Value *value) const { + BM188xLMemSizeVisitor visitor; + size_t lmem_size = 0; + + for (auto &use : value->getUsers()) { + + auto *instr = use.get(); + + visitor.accept_helper(instr); + auto opnd_size = visitor.getResult(); + auto opnd_idx = use.idx_; + + if (opnd_idx < opnd_size.size()) { + lmem_size = opnd_size.at(opnd_idx); + } + + if (lmem_size > 0) { + break; + } + } + return lmem_size; +} + +// only conv.weight, conv.bias, fc.bias can be eu-unaligned +bool BM1880TargetTransformInfo::isEUAligned( + const glow::AllocActivationInst *Inst) const { + + for (const auto &use : Inst->getUsers()) { + const auto *instr = use.get(); + // try to find input user, not output user + if (use.getOperand().second == OperandKind::Out) + continue; + + if (auto *conv = + llvm::dyn_cast(instr)) { + // conv.weight or conv.bias + if (use.idx_ == 2 || use.idx_ == 3) + return false; + return true; + } else if (auto *fc = + llvm::dyn_cast( + instr)) { + // fc.bias + if (use.idx_ == 3) + return false; + return true; + } + } + return true; +} + +std::vector +BM1880TargetTransformInfo::getLMemSizeFromInst(glow::Instruction *Inst) const { + BM188xLMemSizeVisitor visitor; + visitor.accept_helper(Inst); + return visitor.getResult(); +} + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.h b/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.h new file mode 100644 index 0000000000..ae641dfa40 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM1880TargetTransformInfo.h @@ -0,0 +1,33 @@ +#ifndef BM1880_TARGET_TRANSFORM_INFO_H +#define BM1880_TARGET_TRANSFORM_INFO_H + +#include "Backends/Sophon/SophonTargetTransformInfo.h" +namespace glow { +namespace sophon { + +class BM1880TargetTransformInfo : public SophonTargetTransformInfo { +public: + static BM1880TargetTransformInfo *getInstance(); + BM1880TargetTransformInfo(BM1880TargetTransformInfo const &) = delete; + void operator=(BM1880TargetTransformInfo const &) = delete; + +public: + size_t getLMemSizeFromValue(glow::Value *value) const override; + bool isEUAligned(const glow::AllocActivationInst *Inst) const override; + + std::vector + getLMemSizeFromInst(glow::Instruction *Inst) const override; + + int getLocalMemSizeInBytes() const override; + int getTPUNum() const override; + int getNPUNum() const override; + int getEUNum() const override; + +private: + BM1880TargetTransformInfo(); +}; + +} // namespace sophon +} // namespace glow + +#endif // BM1880_TARGET_TRANSFORM_INFO_H diff --git a/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.cpp b/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.cpp new file mode 100644 index 0000000000..0176ec5326 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.cpp @@ -0,0 +1,256 @@ +#define DEBUG_TYPE "BM188xLMemSizeVisitor" + +#include "BM188xLMemSizeVisitor.h" +#include "BM1880TargetTransformInfo.h" +#include "Backends/Sophon/Utility/memory.h" +#include "glow/IR/IRUtils.h" +#include "glow/IR/Instrs.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using llvm::cast; +using llvm::dyn_cast; + +namespace glow { +namespace sophon { +BM188xLMemSizeVisitor::BM188xLMemSizeVisitor() { + npu_num_ = + glow::sophon::BM1880TargetTransformInfo::getInstance()->getNPUNum(); + eu_num_ = glow::sophon::BM1880TargetTransformInfo::getInstance()->getEUNum(); +} + +size_t BM188xLMemSizeVisitor::calFCBias(glow::Value *value) { + auto orig_dim = value->getType()->dims(); + SophonFCBiasDim dim(orig_dim); + size_t sz = + glow::sophon::formula_4d_nonaligned(dim.n, dim.c, dim.h, dim.w, npu_num_); + return sz; +} + +size_t BM188xLMemSizeVisitor::calAligned(glow::Value *value) { + auto orig_dim = value->getType()->dims(); + glow::sophon::SophonDim dim(orig_dim); + size_t sz = glow::sophon::formula_4d_aligned(dim.n, dim.c, dim.h, dim.w, + npu_num_, eu_num_); + return sz; +} + +size_t BM188xLMemSizeVisitor::calNonAligned(glow::Value *value) { + auto orig_dim = value->getType()->dims(); + glow::sophon::SophonDim dim(orig_dim); + size_t sz = + glow::sophon::formula_4d_nonaligned(dim.n, dim.c, dim.h, dim.w, npu_num_); + return sz; +} + +template +std::vector BM188xLMemSizeVisitor::calGeneralAligned(T *inst) { + std::vector operand_size; + unsigned opnd_num = inst->getNumOperands(); + for (unsigned i = 0; i < opnd_num; i++) { + auto *value = inst->getOperand(i).first; + size_t sz = calAligned(value); + operand_size.push_back(sz); + } + return operand_size; +} + +template +void BM188xLMemSizeVisitor::calOperand(T *inst, std::vector *result, + const int opnd_id, bool aligned) { + size_t sz; + auto *value = inst->getOperand(opnd_id).first; + if (aligned) + sz = calAligned(value); + else + sz = calNonAligned(value); + result->push_back(sz); +} + +template +void BM188xLMemSizeVisitor::calEach(T *inst, std::vector *result, + const Type &oprnd, + const Types &... oprnds) { + + calOperand(inst, result, ID, oprnd); + calEach(inst, result, oprnds...); +} + +template +void BM188xLMemSizeVisitor::calEach(T *inst, std::vector *result, + const Type &oprnd) { + calOperand(inst, result, ID, oprnd); +} + +template +std::vector +BM188xLMemSizeVisitor::calOperandList(T *inst, const Type &oprnd, + const Types &... oprnds) { + std::vector operand_size; + calEach<0>(inst, &operand_size, oprnd, oprnds...); + return operand_size; +} + +void BM188xLMemSizeVisitor::default_method(glow::Instruction *Inst) { + DEBUG_GLOW(llvm::dbgs() << "unsupport Instr " << Inst->getKindName() << " " + << Inst->getName() << "\n"); + oprnd_size_.clear(); + assert(false && "getLMemSize: Unsupport Instrution"); +} + +// +// SophonOp +// +void BM188xLMemSizeVisitor::visit(glow::SophonReluQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonAvgPoolQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMaxPoolQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonConvolutionQ8Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ NONALIGN, + /* bias */ NONALIGN); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonFullyConnectedQ8Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ ALIGN); + // handle special case: fc bias + size_t sz_bias = calFCBias(Inst->getOperand(3).first); + oprnd_size_.push_back(sz_bias); +} + +size_t BM188xLMemSizeVisitor::calValueSize(glow::Value *Value, + bool calByOutUser) { + for (auto &use : Value->getUsers()) { + + if (calByOutUser && use.getOperand().second != OperandKind::Out) { + continue; + } + + // ignore Out user + if (!calByOutUser && use.getOperand().second == OperandKind::Out) { + continue; + } + auto *instr = use.get(); + accept_helper(instr); + auto opnd_size = getResult(); + auto opnd_idx = use.idx_; + + if (opnd_idx < opnd_size.size()) { + size_t lmem_size = opnd_size.at(opnd_idx); + if (lmem_size > 0) { + return lmem_size; + } + } + } +} + +void BM188xLMemSizeVisitor::visit(glow::SophonLoadInst *Inst) { + // load dest, src + // load global to dest local + + // dest size depend by memory shape + size_t dest_size = calValueSize(Inst->getDest()); + + // src size depend by tensor shape + auto *w = cast(Inst->getSrc()); + size_t src_size = w->getSizeInBytes(); + + // clear oprnd_size_ used by calValueSize + oprnd_size_.clear(); + oprnd_size_.push_back(dest_size); + oprnd_size_.push_back(src_size); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonStoreInst *Inst) { + // store dest, src + // store local to global + + // dest size depend by tensor shape + auto *w = cast(Inst->getDest()); + size_t dest_size = w->getSizeInBytes(); + + // src size depend by memory shape + // because the input user of store Inst is itself. + // we need to use output user to decide which value to calculate size + size_t src_size = calValueSize(getOrigin(Inst->getSrc()), true); + + // clear oprnd_size_ used by calValueSize + oprnd_size_.clear(); + oprnd_size_.push_back(dest_size); + oprnd_size_.push_back(src_size); +} + +// +// SophonMI +// +void BM188xLMemSizeVisitor::visit(glow::SophonMIGDMAGlobalToLocalInst *Inst) { + oprnd_size_.clear(); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIGDMALocalToGlobalInst *Inst) { + oprnd_size_.clear(); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIMulConstQ16Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIMulConstQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIMacConstQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIReluQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIAvgPoolingQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIMaxPoolingQ8Inst *Inst) { + oprnd_size_ = calGeneralAligned(Inst); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIConvolutionQ8Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ NONALIGN, + /* bias */ NONALIGN); +} + +void BM188xLMemSizeVisitor::visit( + glow::SophonMIDepthwiseConvolutionQ8Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ ALIGN, + /* bias */ NONALIGN); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIFCQ8Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ ALIGN); + // handle special case: fc bias + size_t sz_bias = calFCBias(Inst->getOperand(3).first); + oprnd_size_.push_back(sz_bias); +} + +void BM188xLMemSizeVisitor::visit(glow::SophonMIFCQ16Inst *Inst) { + oprnd_size_ = calOperandList(Inst, ALIGN, ALIGN, /* weight */ ALIGN); + // handle special case: fc bias + size_t sz_bias = calFCBias(Inst->getOperand(3).first); + oprnd_size_.push_back(sz_bias); +} + +std::vector BM188xLMemSizeVisitor::getResult() { return oprnd_size_; } + +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.h b/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.h new file mode 100644 index 0000000000..cb98ddc9f5 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/BM188xLMemSizeVisitor.h @@ -0,0 +1,72 @@ +#ifndef BM188X_LMEMSIZE_VISITOR_H +#define BM188X_LMEMSIZE_VISITOR_H + +#include "Backends/Sophon/GlowLIRVisitor.h" +#include "glow/IR/IR.h" + +namespace glow { +namespace sophon { +class BM188xLMemSizeVisitor : public glow::GlowLIRVisitor { +public: + BM188xLMemSizeVisitor(); + + template std::vector calGeneralAligned(T *inst); + size_t calAligned(glow::Value *value); + size_t calNonAligned(glow::Value *value); + size_t calFCBias(glow::Value *value); + + // GlowOp + + // SophonOp + void visit(glow::SophonReluQ8Inst *Inst) override; + void visit(glow::SophonAvgPoolQ8Inst *Inst) override; + void visit(glow::SophonMaxPoolQ8Inst *Inst) override; + void visit(glow::SophonConvolutionQ8Inst *Inst) override; + void visit(glow::SophonFullyConnectedQ8Inst *Inst) override; + void visit(glow::SophonLoadInst *Inst) override; + void visit(glow::SophonStoreInst *Inst) override; + + // SophonMI + void visit(glow::SophonMIGDMAGlobalToLocalInst *Inst) override; + void visit(glow::SophonMIGDMALocalToGlobalInst *Inst) override; + void visit(glow::SophonMIMulConstQ16Inst *Inst) override; + void visit(glow::SophonMIMulConstQ8Inst *Inst) override; + void visit(glow::SophonMIMacConstQ8Inst *Inst) override; + void visit(glow::SophonMIReluQ8Inst *Inst) override; + void visit(glow::SophonMIAvgPoolingQ8Inst *Inst) override; + void visit(glow::SophonMIMaxPoolingQ8Inst *Inst) override; + void visit(glow::SophonMIConvolutionQ8Inst *Inst) override; + void visit(glow::SophonMIDepthwiseConvolutionQ8Inst *Inst) override; + void visit(glow::SophonMIFCQ8Inst *Inst) override; + void visit(glow::SophonMIFCQ16Inst *Inst) override; + void default_method(glow::Instruction *Inst) override; + std::vector getResult(); + +private: + template + std::vector calOperandList(T *inst, const Type &oprnd, + const Types &... oprnds); + template + void calOperand(T *inst, std::vector *result, const int opnd_id, + bool aligned); + template + void calEach(T *inst, std::vector *result, const Type &oprnd, + const Types &... oprnds); + template + void calEach(T *inst, std::vector *result, const Type &oprnd); + + // use input or in/outut user to decide which value to calculate size. + size_t calValueSize(glow::Value *Value, bool calByOutUser = false); + +private: + const bool NONALIGN = false; + const bool ALIGN = true; + unsigned npu_num_; + unsigned eu_num_; + std::vector oprnd_size_; +}; + +} // namespace sophon +} // namespace glow + +#endif // BM188X_LMEMSIZE_VISITOR_H diff --git a/lib/Backends/Sophon/BM188x/CMakeLists.txt b/lib/Backends/Sophon/BM188x/CMakeLists.txt new file mode 100644 index 0000000000..9ed7b79d13 --- /dev/null +++ b/lib/Backends/Sophon/BM188x/CMakeLists.txt @@ -0,0 +1,13 @@ + +add_libbmnet_glow( + BM1880AllocationsInfo.cpp + BM1880Backend.cpp + BM1880CodeGenBMK.cpp + BM1880CodeGen.cpp + BM1880DumpAllPass.cpp + BM1880ExpandSophonInst.cpp + BM1880HandleReshapePass.cpp + BM1880InsertLoadStorePass.cpp + BM1880MemoryAllocPass.cpp + BM1880TargetTransformInfo.cpp + BM188xLMemSizeVisitor.cpp) diff --git a/lib/Backends/Sophon/Bundle.cpp b/lib/Backends/Sophon/Bundle.cpp new file mode 100644 index 0000000000..5f34ac3aa6 --- /dev/null +++ b/lib/Backends/Sophon/Bundle.cpp @@ -0,0 +1,169 @@ +/* + * bmnet/lib/Backends/Sophon/Bundle.cpp + * + * Copyright Bitmain Technologies Inc. + * Written by: + * Wanwei CAI + * Created Time: 2018-10-13 17:24 + */ + +#define DEBUG_TYPE "bundle_saver" + +#include "Bundle.h" +#include "CommandLine.h" +#include "SophonBackend.h" + +#include "glow/Graph/Graph.h" +#include "glow/IR/Instrs.h" +#include "glow/Support/Debug.h" + +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" + +#include + +using namespace glow; +using llvm::cast; +using llvm::dyn_cast; +using llvm::isa; + +Bundle::Bundle(const SophonBackend *backend, AllocationsInfo &allocationsInfo) + : backend_(backend), allocationsInfo_(allocationsInfo) {} + +void Bundle::getInputs(IRFunction *F, InputList &inputs) { + for (auto v : F->getGraph()->getParent()->getPlaceholders()) { + if (v->getName().find("save_") != llvm::StringRef::npos) + continue; + inputs.push_back(v); + } +} + +void Bundle::getOutputs(IRFunction *F, OutputList &outputs) { + for (auto v : F->getGraph()->getParent()->getPlaceholders()) { + if (v->getName().find("save_") == llvm::StringRef::npos) + continue; + if (v->getName().find("_sophon_spill") != llvm::StringRef::npos) + continue; + outputs.push_back(v); + } +} + +static uint32_t glueGetFmt(ElemKind kind) { + switch (kind) { + case ElemKind::FloatTy: + return FMT_F32; + case ElemKind::Float16Ty: + return FMT_F16; + case ElemKind::Int8QTy: + return FMT_I8; + case ElemKind::Int16QTy: + return FMT_I16; + case ElemKind::Int32QTy: + return FMT_I32; + default: + llvm_unreachable("type not support now"); + break; + } +} + +std::unique_ptr Bundle::produceBmodel(IRFunction *F) { + + std::unique_ptr model(new bmodel::Model()); + float threshold = 0.0f; + + model->set_net_name(F->getGraph()->getName()); + uint32_t target = backend_->getTarget(); + model->set_chip(target); + // fmt here is default value + switch (target) { + case 1682: + case 1684: + model->set_fmt(FMT_F32); + break; + case 1880: + case 1882: + model->set_fmt(FMT_I8); + break; + default: + llvm_unreachable("chip not support now"); + break; + } + + // inputs + InputList inputs; + getInputs(F, inputs); + + auto command = model->add_command(); + for (auto &v : inputs) { // support multi input + auto input = command->add_input(); + for (auto &dim : v->getType()->dims()) { + input->mutable_shape()->add_dim(dim); + input->set_threshold(threshold); // 1880 need + // input->set_fmt(glueGetFmt(v->getElementType())); + } + } + + // outputs + OutputList outputs; + getOutputs(F, outputs); + for (auto v : outputs) { + auto *o = cast(F->getWeightForNode(v)); + size_t output_offset = allocationsInfo_.getAllocatedAddress()[o]; + auto output = command->add_output(); + output->set_name(v->getName()); + // output->set_threshold(0.0f); // 1880 need + output->set_offset(output_offset); + auto dimSize = v->getType()->dims().size(); + if (dimSize != 4 && dimSize != 2) { + llvm_unreachable("Unsupported output shape"); + } + for (auto dim : v->getType()->dims()) { + output->mutable_shape()->add_dim(dim); + } + // output->set_fmt(glueGetFmt(v->getElementType())); + } + + size_t total_neuron_size = allocationsInfo_.getActivationsMemSize(); + command->set_neuron_size(total_neuron_size); + *(command->mutable_cmdbuf()) = {cmdbuf_.begin(), cmdbuf_.end()}; + auto weight = model->add_weight(); + *weight = {u8_weights_.begin(), u8_weights_.end()}; + +#if 0 // for debug + std::string file("/tmp/glow.cmdbuf"); + bmnet::WriteFloatDataToBinaryFile(cg.getCmdbuf().data(), cg.getCmdbuf().size(), file); + std::string file1("/tmp/glow.weight"); + bmnet::WriteFloatDataToBinaryFile(weight_u8.data(), weight_u8.size(), file1); +#endif + +#if 0 + // cpu layer here + findCpuLayers(cpu_layers); + VLOG(2) << "cpu_layers: " << cpu_layers.size(); +#endif + + return model; +} + +void Bundle::saveBmodelFile(std::unique_ptr model, + const std::string &outputDir) { + assert(model != nullptr); + DEBUG_GLOW(bmodel::print(*model)); + + auto fileName = outputDir + bmodelFileNameOpt; + bmerr_t ret = bmodel::save(*model, fileName); + assert(ret == BM_SUCCESS); + + DEBUG_GLOW(llvm::dbgs() << "Save bmodel to: " << fileName << "\n"); +} + +std::unique_ptr Bundle::codegen(IRFunction *F) { + // generate weight + backend_->generateWeights(F, allocationsInfo_, u8_weights_); + + // generate cmdbuf + backend_->codeGenCmdbuf(F, allocationsInfo_, cmdbuf_); + + auto model = produceBmodel(F); + return model; +} diff --git a/lib/Backends/Sophon/Bundle.h b/lib/Backends/Sophon/Bundle.h new file mode 100644 index 0000000000..8281ab267d --- /dev/null +++ b/lib/Backends/Sophon/Bundle.h @@ -0,0 +1,57 @@ +/* + * bmnet/lib/Backends/Sophon/Bundle.h + * + * Copyright Bitmain Technologies Inc. + * Written by: + * Wanwei CAI + * Created Time: 2018-10-13 17:22 + */ + +#ifndef BMNET_BACKENDS_SOPHON_BUNDLESAVER_H +#define BMNET_BACKENDS_SOPHON_BUNDLESAVER_H + +#include "AllocationsInfo.h" +#include "SophonBackend.h" +#include "glow/IR/IR.h" +#include + +namespace glow { + +class Bundle final { +public: + explicit Bundle(const SophonBackend *backend, + AllocationsInfo &allocationsInfo); + + /// Save code bundle built for \p target to \p outputDir. + /// Make \p networkName the function name for + /// the entry point of the network and prepend all generated + /// files with this name. + std::unique_ptr codegen(IRFunction *F); + + static void saveBmodelFile(std::unique_ptr model, + const std::string &outputDir); + +private: + using InputList = std::vector; + using OutputList = std::vector; + + const SophonBackend *backend_; + /// Information about allocations. + AllocationsInfo &allocationsInfo_; + + SophonCmdBuf cmdbuf_; + std::vector u8_weights_; + +private: + /// Perform IR group optimization. + void performIRGroup(); + /// Produce a bundle. + + std::unique_ptr produceBmodel(IRFunction *F); + + void getInputs(IRFunction *F, InputList &inputs); + void getOutputs(IRFunction *F, OutputList &outputs); +}; +} // namespace glow + +#endif diff --git a/lib/Backends/Sophon/CMakeLists.txt b/lib/Backends/Sophon/CMakeLists.txt new file mode 100644 index 0000000000..fd348ea671 --- /dev/null +++ b/lib/Backends/Sophon/CMakeLists.txt @@ -0,0 +1,58 @@ + + +# prepare Sophon external lib +# get more info from https://sophon-edge.gitbook.io/project/getting-started/bmnnsdk-installation +include(ExternalProject) +set(Sophon_ext_lib ${CMAKE_CURRENT_BINARY_DIR}/sophon_sdk/src/sophon_sdk/bmtap2-bm1880-usb_1.0.2) +ExternalProject_Add(sophon_sdk + PREFIX sophon_sdk + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + BUILD_BYPRODUCTS + ${Sophon_ext_lib}/lib/cmodel/libbmkernel-static.a + ${Sophon_ext_lib}/lib/cmodel/libbmodel.so + ${Sophon_ext_lib}/lib/cmodel/libbmruntime.so + GIT_REPOSITORY https://github.com/ffk0716/bm1880-bmnnsdk-usb.git + GIT_TAG bm1880-usb_1.0.2.2-hotfix) + +# Sophon Backend +find_package(Protobuf REQUIRED) +include_directories(SYSTEM ${Protobuf_INCLUDE_DIRS}) +include_directories(SYSTEM ${Sophon_ext_lib}/bmnet_sdk/include) +include_directories(SYSTEM ${Sophon_ext_lib}/bmnet_sdk/install/include) +include_directories(${Glow_SOURCE_DIR}/lib) + +add_library(Sophon "") + +target_link_libraries(Sophon PRIVATE + Backends + CodeGen + Importer + Quantization + ${Sophon_ext_lib}/lib/cmodel/libbmkernel-static.a + ${Sophon_ext_lib}/lib/cmodel/libbmodel.so + ${Sophon_ext_lib}/lib/cmodel/libbmruntime.so) + +set(Sophon_base ${Glow_SOURCE_DIR}/lib/Backends) +function(add_libbmnet_glow) + file(RELATIVE_PATH target_name ${Sophon_base} ${CMAKE_CURRENT_SOURCE_DIR}) + string(REPLACE "/" "_" target_name ${target_name}) + string(REPLACE "-" "_" target_name ${target_name}) + set(target_name ${target_name}_obj) + add_library(${target_name} OBJECT ${ARGN}) + add_dependencies(${target_name} sophon_sdk) + target_sources(Sophon PRIVATE $) +endfunction() + + +add_libbmnet_glow( + Bundle.cpp + CommandLine.cpp + GlowLIRVisitor.cpp + SophonBackend.cpp + SophonFunction.cpp + SophonQuantizer.cpp) + +add_subdirectory(BM188x) +add_subdirectory(Utility) diff --git a/lib/Backends/Sophon/CommandLine.cpp b/lib/Backends/Sophon/CommandLine.cpp new file mode 100644 index 0000000000..a048746d97 --- /dev/null +++ b/lib/Backends/Sophon/CommandLine.cpp @@ -0,0 +1,17 @@ +#include "CommandLine.h" + +namespace glow { + +llvm::cl::opt + bmodelFileNameOpt("bmodel", llvm::cl::desc("Specify file name to bmodel\n"), + llvm::cl::value_desc("bmodelPath"), + llvm::cl::init("tmp.bmodel"), + llvm::cl::cat(SophonBackendCat)); +llvm::cl::opt + enableLayerGroupOpt("enable-layer-group", + llvm::cl::desc("Enbale layer group optimization\n"), + llvm::cl::init(true), llvm::cl::cat(SophonBackendCat)); + +llvm::cl::OptionCategory SophonBackendCat("Sophon Backend Options"); + +} // namespace glow diff --git a/lib/Backends/Sophon/CommandLine.h b/lib/Backends/Sophon/CommandLine.h new file mode 100644 index 0000000000..658106aa77 --- /dev/null +++ b/lib/Backends/Sophon/CommandLine.h @@ -0,0 +1,21 @@ +/* + * bmnet/lib/Backends/Sophon/CommandLine.h + * + * Copyright Bitmain Technologies Inc. + * Written by: + * Wanwei CAI + * Created Time: 2018-10-15 09:54 + */ + +#ifndef _COMMANDLINE_H +#define _COMMANDLINE_H + +#include "llvm/Support/CommandLine.h" + +namespace glow { +extern llvm::cl::OptionCategory SophonBackendCat; +extern llvm::cl::opt bmodelFileNameOpt; +extern llvm::cl::opt enableLayerGroupOpt; +} // namespace glow + +#endif diff --git a/lib/Backends/Sophon/GlowLIRVisitor.cpp b/lib/Backends/Sophon/GlowLIRVisitor.cpp new file mode 100644 index 0000000000..140014901c --- /dev/null +++ b/lib/Backends/Sophon/GlowLIRVisitor.cpp @@ -0,0 +1,41 @@ +#define DEBUG_TYPE "visitor" +#include "GlowLIRVisitor.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" + +using namespace glow; + +#define VISIT_INSTR(CLASS) \ + case glow::Kinded::Kind::CLASS##Kind: { \ + this->visit(llvm::cast(Inst)); \ + } break; + +void GlowLIRVisitor::accept_helper(glow::Instruction *Inst) { + switch (Inst->getKind()) { +#define DEF_VALUE(CLASS, NAME) +#define DEF_INSTR(CLASS, NAME) VISIT_INSTR(CLASS) +#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME) VISIT_INSTR(CLASS) +#include "glow/AutoGenInstr.def" + default: + llvm_unreachable("Unknown value kind"); + break; + } +} + +#define CONST_VISIT_INSTR(CLASS) \ + case glow::Kinded::Kind::CLASS##Kind: { \ + this->visit(llvm::cast(Inst)); \ + } break; + +void GlowLIRVisitor::accept_helper(const glow::Instruction *Inst) { + switch (Inst->getKind()) { +#define DEF_VALUE(CLASS, NAME) +#define DEF_INSTR(CLASS, NAME) CONST_VISIT_INSTR(CLASS) +#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME) CONST_VISIT_INSTR(CLASS) +#include "glow/AutoGenInstr.def" + default: + llvm_unreachable("Unknown value kind"); + break; + } +} diff --git a/lib/Backends/Sophon/GlowLIRVisitor.h b/lib/Backends/Sophon/GlowLIRVisitor.h new file mode 100644 index 0000000000..f5560dd0fb --- /dev/null +++ b/lib/Backends/Sophon/GlowLIRVisitor.h @@ -0,0 +1,30 @@ +#pragma once + +#include "glow/IR/Instrs.h" + +namespace glow { + +class GlowLIRVisitor { +public: + GlowLIRVisitor() = default; + virtual ~GlowLIRVisitor() = default; + + void accept_helper(glow::Instruction *Inst); + void accept_helper(const glow::Instruction *Inst); + + virtual void default_method(const glow::Instruction *Inst) {} + virtual void default_method(glow::Instruction *Inst) {} + +#define DEF_METHOD(CLASS) \ + virtual void visit(const CLASS *Inst) { default_method(Inst); } \ + virtual void visit(CLASS *Inst) { default_method(Inst); } + +#define DEF_VALUE(CLASS, NAME) +#define DEF_INSTR(CLASS, NAME) DEF_METHOD(CLASS) +#define DEF_BACKEND_SPECIFIC_INSTR(CLASS, NAME) DEF_METHOD(CLASS) +#include "glow/AutoGenInstr.def" + +#undef DEF_METHOD +}; + +} // namespace glow diff --git a/lib/Backends/Sophon/SophonBackend.cpp b/lib/Backends/Sophon/SophonBackend.cpp new file mode 100644 index 0000000000..ac9ce8c8c9 --- /dev/null +++ b/lib/Backends/Sophon/SophonBackend.cpp @@ -0,0 +1,51 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "SophonBackend.h" +#include "BM188x/BM1880Backend.h" +#include "Bundle.h" +#include "CommandLine.h" +#include "SophonFunction.h" + +using namespace glow; + +llvm::cl::opt + target("target", llvm::cl::desc("Specify Sophon target"), + llvm::cl::values(clEnumValN(Sophon::Target::BM1682, "bm1682", + "Support float precision"), + clEnumValN(Sophon::Target::BM1880, "bm1880", + "Support int8 symmetric precision"), + clEnumValN(Sophon::Target::BM1882, "bm1882", + "Support int8 symmetric precision")), + llvm::cl::init(Sophon::Target::BM1682), + llvm::cl::Optional // TODO: change to Required in the future. + ); + +llvm::cl::opt loadCtable("load_ctable", + llvm::cl::desc("Load calibration file"), + llvm::cl::value_desc("ctable.pb2"), + llvm::cl::Optional); + +namespace glow { + +Backend *SophonBackend::createBackend() { + if (target == Sophon::Target::BM1880) { + return new BM1880Backend(); + } + llvm_unreachable("unsupport target!"); +} + +} // namespace glow diff --git a/lib/Backends/Sophon/SophonBackend.h b/lib/Backends/Sophon/SophonBackend.h new file mode 100644 index 0000000000..aa6e473366 --- /dev/null +++ b/lib/Backends/Sophon/SophonBackend.h @@ -0,0 +1,62 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef GLOW_BACKENDS_Sophon_SophonBackend_H +#define GLOW_BACKENDS_Sophon_SophonBackend_H + +#include "AllocationsInfo.h" +#include "SophonTargetTransformInfo.h" + +#include "glow/Backends/Backend.h" +#include "glow/Backends/CompiledFunction.h" +#include "glow/Graph/Node.h" + +namespace glow { + +namespace Sophon { +enum Target { + BM1682, + BM1880, + BM1882, +}; +} // namespace Sophon + +using SophonCmdBuf = std::vector; + +// Base class for all Sophon platform +class SophonBackend : public BackendUsingGlowIR { +public: + SophonBackend() = default; + + ~SophonBackend() override = default; + + static Backend *createBackend(); + + virtual uint32_t getTarget() const = 0; // such as 1682/1880/... + + virtual void generateWeights(IRFunction *F, AllocationsInfo &allocationsInfo, + std::vector &weights) const = 0; + virtual void codeGenCmdbuf(IRFunction *F, AllocationsInfo &allocationsInfo, + SophonCmdBuf &cmdbuf) const = 0; + + // feature + virtual bool hasFP32Inst() const = 0; + virtual bool hasInt8Inst() const = 0; + virtual sophon::SophonTargetTransformInfo *getTTI() const { return nullptr; } +}; + +} // namespace glow + +#endif // GLOW_BACKENDS_Sophon_SophonBackend_H diff --git a/lib/Backends/Sophon/SophonFunction.cpp b/lib/Backends/Sophon/SophonFunction.cpp new file mode 100644 index 0000000000..3adb5ebd01 --- /dev/null +++ b/lib/Backends/Sophon/SophonFunction.cpp @@ -0,0 +1,114 @@ +/* + * bmnet/lib/Backends/Sophon/SophonFunction.cpp + * + * Copyright Bitmain Technologies Inc. + * Written by: + * Wanwei CAI + * Created Time: 2018-10-15 10:03 + */ + +#define DEBUG_TYPE "sophon_func" + +#include "SophonFunction.h" +#include "CommandLine.h" +#include "glow/Base/Tensor.h" +#include "glow/Graph/Context.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" + +#include +#include +#include + +namespace glow { + +SophonFunction::SophonFunction(std::unique_ptr model) { + model_ = std::move(model); +} + +SophonFunction::~SophonFunction() {} + +void SophonFunction::execute(Context &ctx) { + DEBUG_GLOW(bmodel::print(*model_)); + + bmctx_t bmctx; + bmerr_t ret; + ret = bm_init(0, &bmctx); + if (ret != BM_SUCCESS) { + llvm_unreachable("bm_init failed"); + } + bmnet_t net; + bmnet_output_info_t output_info; + + auto bmodel_filename = []() { + char temp[] = "/tmp/glow-ut-temp.XXXXXX"; + return std::string(mkdtemp(temp)) + "/test.bmodel"; + }(); + + ret = bmodel::save(*model_, bmodel_filename); + if (ret != BM_SUCCESS) { + llvm_unreachable("save bmodel failed"); + } + + ret = bmnet_register_bmodel(bmctx, bmodel_filename.c_str(), &net); +#if 0 // bmnet_register_bmodel_data has bug + std::string bmodel_json = bmodel_->json_dump(); + ret = bmnet_register_bmodel_data( + bmctx, reinterpret_cast(const_cast(bmodel_json.data())), + bmodel_json.size(), &net); +#endif + if (ret != BM_SUCCESS) { + llvm_unreachable("register failed"); + } + ret = bmnet_get_output_info(net, &output_info); + if (ret != BM_SUCCESS) { + llvm_unreachable("get output failed!"); + } + + // TODO support multiple inputs + std::vector input; + + for (auto PH : ctx.pairs()) { + // input if fail to find "save_" prefix + if (PH.first->getName().find("save_") == llvm::StringRef::npos) { + DEBUG_GLOW(llvm::dbgs() << "input name is: " << PH.first->getName()); + auto *tensor = PH.second; + input.resize(tensor->size()); + memcpy(input.data(), PH.second->getUnsafePtr(), tensor->size()); + } + } + + // upload input data + ret = bmnet_load_input(net, input.data()); + if (ret != BM_SUCCESS) { + llvm_unreachable("load input failed!"); + } + + // run cmdbuf + ret = bmnet_run(net); + if (ret != BM_SUCCESS) { + llvm_unreachable("run failed!"); + } + + size_t output_size = output_info.output_size; + std::vector output(output_size); + // download output data + ret = bmnet_store_output(net, output.data()); + if (ret != BM_SUCCESS) { + llvm_unreachable("store output failed!"); + } + + bmnet_cleanup(net); + bm_exit(bmctx); + + // TODO support multiple outputs + for (auto PH : ctx.pairs()) { + // Sophon Backend uses "save_" prefix to recognize output + if (PH.first->getName().find("save_") != llvm::StringRef::npos) { + DEBUG_GLOW(llvm::dbgs() << "output name is: " << PH.first->getName()); + memcpy(PH.second->getUnsafePtr(), output.data(), output_size); + } + } +} + +} // namespace glow diff --git a/lib/Backends/Sophon/SophonFunction.h b/lib/Backends/Sophon/SophonFunction.h new file mode 100644 index 0000000000..d6e92fff7a --- /dev/null +++ b/lib/Backends/Sophon/SophonFunction.h @@ -0,0 +1,39 @@ +/* + * bmnet/lib/Backends/Sophon/SophonFunction.h + * + * Copyright Bitmain Technologies Inc. + * Written by: + * Wanwei CAI + * Created Time: 2018-10-15 09:58 + */ + +#ifndef _SophonFUNCTION_H +#define _SophonFUNCTION_H + +#include "glow/Backends/CompiledFunction.h" + +#include +#include + +namespace glow { + +/// A Glow IR function compiled for Sophon. +class SophonFunction final : public CompiledFunction { + +public: + /// Ctor. + explicit SophonFunction(std::unique_ptr model); + + /// @name CompiledFunction interface + ///@{ + ~SophonFunction() override; + + void execute(Context &ctx) override; + +private: + std::unique_ptr model_; +}; + +} // namespace glow + +#endif diff --git a/lib/Backends/Sophon/SophonQuantizer.cpp b/lib/Backends/Sophon/SophonQuantizer.cpp new file mode 100644 index 0000000000..c75b7d5932 --- /dev/null +++ b/lib/Backends/Sophon/SophonQuantizer.cpp @@ -0,0 +1,190 @@ +/* + * Copyright (C) Bitmain Technologies Inc. + * All Rights Reserved. + */ + +#define DEBUG_TYPE "sophon_quantizer" + +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +#include "SophonQuantizer.h" + +using llvm::dyn_cast; + +namespace glow { + +const LayerCalibrationParameter * +SophonQuantizer::getLayerCalibrationParameter(const std::string &name) { + for (int i = 0; i < netCalibrationParameter_.layer_size(); i++) { + const LayerCalibrationParameter &layer = netCalibrationParameter_.layer(i); + if (layer.name() == name) { + return &layer; + } + for (int j = 0; j < layer.blob_param_size(); j++) { + if (layer.blob_param(j).name() == name) { + return &layer; + } + } + } + return nullptr; +} + +TypeRef SophonQuantizer::getTargetTypeForOutput(const NodeValue &out) const { + if (out.getElementType() != ElemKind::FloatTy) { + return nullptr; + } + // TODO: Deal with Int16QTy output case in FC. + return mod_.uniqueType(ElemKind::Int8QTy, out.dims(), 1, 0); +} + +TypeRef SophonQuantizer::getTargetTypeForInput(const Node &use, + unsigned idx) const { + NodeValue val = use.getNthInput(idx); + + // Do not quantize non floating point type, e.g., Index type. + if (val.getElementType() != ElemKind::FloatTy) { + return nullptr; + } + + // For bias of a conv/fc op, it is quantized to int16. + if (use.getKind() == glow::Kinded::Kind::SophonConvolutionNodeKind && + idx == 2) { + auto convN = llvm::dyn_cast(&use); + return mod_.uniqueType(ElemKind::Int16QTy, val.dims(), 1, 0); + } else if (use.getKind() == glow::Kinded::Kind::FullyConnectedNodeKind && + idx == 2) { + return mod_.uniqueType(ElemKind::Int16QTy, val.dims(), 1, 0); + } else { + return mod_.uniqueType(ElemKind::Int8QTy, val.dims(), 1, 0); + } +} + +bool SophonQuantizer::canConvert(const Node &node) const { + auto kind = node.getKind(); + + if (!EE_.isOpSupported(kind, ElemKind::Int8QTy)) { + return false; + } + + // Make sure that all inputs are floats. + for (unsigned i = 0, e = node.getNumInputs(); i < e; ++i) { + if (node.getNthInput(i).getElementType() != ElemKind::FloatTy) { + return false; + } + } + + return true; +} + +Node *SophonQuantizer::createConversion(Function &function, NodeValue &val, + TypeRef destTy) { + if (destTy->isQuantizedType()) { + assert((destTy->getElementType() == ElemKind::Int8QTy || + destTy->getElementType() == ElemKind::Int16QTy || + destTy->getElementType() == ElemKind::Int32QTy) && + "We only support int8_t int16_t and int32_t quantization now"); + return function_.createQuantize("quantize", val, destTy); + } + + assert(destTy->getElementType() == ElemKind::FloatTy && ""); + return function.createDequantize("dequantize", val); +} + +/// Replace nodes with SophonQ8 nodes. +Node &SophonQuantizer::morphNode(Node &node) { + Node *quantizedNode{}; + // Some nodes are glow-HIR and some are sophon-HIR. Depends on loader. + if (auto *Conv = dyn_cast(&node)) { + auto QT = + mod_.uniqueType(ElemKind::Int8QTy, Conv->getResult().dims(), 1, 0); + const auto *calibration_parameter = + getLayerCalibrationParameter(Conv->getName()); + GLOW_ASSERT(calibration_parameter); + int right_shift_width = calibration_parameter->right_shift_width(); + + quantizedNode = function_.addNode(new SophonConvolutionQ8Node( + Conv->getName(), QT, node.getNthInput(0), node.getNthInput(1), + node.getNthInput(2), {Conv->getStrides()[0], Conv->getStrides()[1]}, + {Conv->getPads()[0], Conv->getPads()[1], Conv->getPads()[2], + Conv->getPads()[3]}, + {1, 1}, // defalut DilationHW is 1,1 + right_shift_width, + false // EnableRelu + )); + } else if (auto *FC = dyn_cast(&node)) { + auto QT = mod_.uniqueType(ElemKind::Int8QTy, FC->getResult().dims(), 1, 0); + const auto *calibration_parameter = + getLayerCalibrationParameter(FC->getName()); + GLOW_ASSERT(calibration_parameter); + int right_shift_width = calibration_parameter->right_shift_width(); + quantizedNode = function_.addNode( + new SophonFullyConnectedQ8Node(FC->getName(), QT, node.getNthInput(0), + node.getNthInput(1), node.getNthInput(2), + false, // Relu + right_shift_width, 0, false)); + } else if (auto *Relu = dyn_cast(&node)) { + auto QT = + mod_.uniqueType(ElemKind::Int8QTy, Relu->getResult().dims(), 1, 0); + quantizedNode = function_.addNode( + new SophonReluQ8Node(Relu->getName(), QT, node.getNthInput(0))); + } else if (auto *Maxpool = dyn_cast(&node)) { + auto QT = + mod_.uniqueType(ElemKind::Int8QTy, Maxpool->getResult().dims(), 1, 0); + const auto *calibration_parameter = + getLayerCalibrationParameter(Maxpool->getName()); + GLOW_ASSERT(calibration_parameter); + int right_shift_width = calibration_parameter->right_shift_width(); + const int *threshold_x_quantized = + calibration_parameter->threshold_x_quantized().data(); + quantizedNode = function_.addNode(new SophonMaxPoolQ8Node( + Maxpool->getName(), QT, node.getNthInput(0), + {Maxpool->getKernels()[0], Maxpool->getKernels()[1]}, + {Maxpool->getStrides()[0], Maxpool->getStrides()[1]}, + {Maxpool->getPads()[0], Maxpool->getPads()[1], Maxpool->getPads()[2], + Maxpool->getPads()[3]}, + right_shift_width, threshold_x_quantized[0], Maxpool->getRoundMode())); + } + + if (quantizedNode != nullptr) { + NodeValue(&node, 0).replaceAllUsesOfWith(quantizedNode); + return *quantizedNode; + } else { + return node; + } +} + +void SophonQuantizer::convertTensor(Tensor &tensor, TypeRef destTy) { + assert(tensor.getElementType() == ElemKind::FloatTy && + destTy->getElementType() == ElemKind::Int8QTy && + "Dequantization not implemented"); + // Do nothing now. +} + +} // namespace glow + +namespace glow { +namespace quantizesophon { + +const NetCalibrationParameter loadCtableFile(const std::string &filename) { + NetCalibrationParameter netCalibrationParameter; + bmnet::ReadProtoFromBinaryFile(filename, &netCalibrationParameter); + std::string ctableNameString = netCalibrationParameter.DebugString(); + DEBUG_GLOW(llvm::dbgs() << "ImportCalibrationTable: " + << "\n" + << ctableNameString << "\n"); + return netCalibrationParameter; +} + +void quantizeSophonGraph(const ExecutionEngine &EE, Function *F, + const std::string &filename) { + const NetCalibrationParameter netCalibrationParameter = + loadCtableFile(filename); + SophonQuantizer sq(EE, *F, netCalibrationParameter); + sq.convert(); +} + +} // namespace quantizesophon +} // namespace glow diff --git a/lib/Backends/Sophon/SophonQuantizer.h b/lib/Backends/Sophon/SophonQuantizer.h new file mode 100644 index 0000000000..51d008f532 --- /dev/null +++ b/lib/Backends/Sophon/SophonQuantizer.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) Bitmain Technologies Inc. + * All Rights Reserved. + */ + +#ifndef GLOW_SOPHON_QUANTIZATION_H +#define GLOW_SOPHON_QUANTIZATION_H + +#include + +#include "glow/Converter/FunctionConverter.h" +#include "glow/ExecutionEngine/ExecutionEngine.h" + +namespace glow { + +/// This class produces a quantized function based on a provided ctable. +class SophonQuantizer : public FunctionConverter { +protected: + TypeRef getTargetTypeForOutput(const NodeValue &out) const override; + + TypeRef getTargetTypeForInput(const Node &use, unsigned idx) const override; + + bool canConvert(const Node &node) const override; + + Node *createConversion(Function &function, NodeValue &val, + TypeRef destTy) override; + + Node &morphNode(Node &node) override; + + void convertTensor(Tensor &tensor, TypeRef destTy) override; + +private: + Module &mod_; + + const ExecutionEngine &EE_; + + /// Calibration table for low precision. + const NetCalibrationParameter &netCalibrationParameter_; + +public: + SophonQuantizer(const ExecutionEngine &EE, Function &F, + const NetCalibrationParameter &netCalibrationParameter) + : FunctionConverter(F), mod_(*F.getParent()), EE_(EE), + netCalibrationParameter_(netCalibrationParameter) {} + + const LayerCalibrationParameter * + getLayerCalibrationParameter(const std::string &name); +}; + +namespace quantizesophon { + +const NetCalibrationParameter loadCtableFile(const std::string &filename); +void quantizeSophonGraph(const ExecutionEngine &EE, Function *F, + const std::string &filename); + +} // namespace quantizesophon +} // namespace glow + +#endif // GLOW_SOPHON_QUANTIZATION_H diff --git a/lib/Backends/Sophon/SophonTargetTransformInfo.h b/lib/Backends/Sophon/SophonTargetTransformInfo.h new file mode 100644 index 0000000000..09b6596e6f --- /dev/null +++ b/lib/Backends/Sophon/SophonTargetTransformInfo.h @@ -0,0 +1,29 @@ +#ifndef SOPHON_TARGET_TRANSFORM_INFO_H +#define SOPHON_TARGET_TRANSFORM_INFO_H + +#include "glow/IR/IR.h" +#include "glow/IR/Instrs.h" +#include + +namespace glow { +namespace sophon { + +class SophonTargetTransformInfo { +public: + virtual size_t getLMemSizeFromValue(glow::Value *value) const { return 0; } + virtual bool isEUAligned(const glow::AllocActivationInst *Inst) const { + return true; + } + virtual std::vector + getLMemSizeFromInst(glow::Instruction *Inst) const { + return std::vector(); + } + virtual int getLocalMemSizeInBytes() const { return 0; } + virtual int getTPUNum() const { return 0; } + virtual int getNPUNum() const { return 0; } + virtual int getEUNum() const { return 0; } +}; +} // namespace sophon + +} // namespace glow +#endif // SOPHON_TARGET_TRANSFORM_INFO_H diff --git a/lib/Backends/Sophon/Utility/CMakeLists.txt b/lib/Backends/Sophon/Utility/CMakeLists.txt new file mode 100644 index 0000000000..c011fbb726 --- /dev/null +++ b/lib/Backends/Sophon/Utility/CMakeLists.txt @@ -0,0 +1,3 @@ + +add_libbmnet_glow( + memory.cpp) diff --git a/lib/Backends/Sophon/Utility/memory.cpp b/lib/Backends/Sophon/Utility/memory.cpp new file mode 100644 index 0000000000..a297aa0c8a --- /dev/null +++ b/lib/Backends/Sophon/Utility/memory.cpp @@ -0,0 +1,23 @@ +#include "memory.h" +namespace glow { +namespace sophon { + +int idiv_round(int pNumerator, int pDenominator) { + return (pNumerator + pDenominator - 1) / pDenominator; +} + +int align(int pNum, int pAlign) { + int mask = pAlign - 1; + return (pNum + mask) & ~mask; +} +size_t formula_4d_aligned(unsigned n, unsigned c, unsigned h, unsigned w, + unsigned npu_num, unsigned eu_num) { + return idiv_round(c, npu_num) * n * align((h * w), eu_num); +} + +size_t formula_4d_nonaligned(unsigned n, unsigned c, unsigned h, unsigned w, + unsigned npu_num) { + return idiv_round(c, npu_num) * n * (h * w); +} +} // namespace sophon +} // namespace glow diff --git a/lib/Backends/Sophon/Utility/memory.h b/lib/Backends/Sophon/Utility/memory.h new file mode 100644 index 0000000000..e43e202d60 --- /dev/null +++ b/lib/Backends/Sophon/Utility/memory.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include + +namespace glow { +namespace sophon { + +int idiv_round(int pNumerator, int pDenominator); +int align(int pNum, int pAlign); +size_t formula_4d_aligned(unsigned n, unsigned c, unsigned h, unsigned w, + unsigned npu_num, unsigned eu_num); +size_t formula_4d_nonaligned(unsigned n, unsigned c, unsigned h, unsigned w, + unsigned npu_num); + +// Implicit W parameter Rule: +// This is based on bmkernel implementation. +// When W is not provided with 2D tensor, we use the algorithm to decide W. +struct SophonFCBiasDim { + unsigned n, c, h, w; + template SophonFCBiasDim(T &vec) { + n = c = h = w = 0; + assert(vec.size() == 1); + unsigned dim = vec[0]; + if (dim > 32) { + w = 32; + } else { + w = 16; + } + n = 2; + h = 1; + c = glow::sophon::idiv_round(dim, w); + } +}; + +struct SophonDim { + unsigned n, c, h, w; + template SophonDim(const T &vec) { + n = c = h = w = 0; + size_t dim = vec.size(); + switch (dim) { + case 4: + n = vec[0]; + c = vec[1]; + h = vec[2]; + w = vec[3]; + break; + case 2: { + unsigned M = vec[0]; + unsigned N = vec[1]; + if (N > 32) { + w = 32; + } else { + w = 16; + } + n = M; + h = 1; + c = glow::sophon::idiv_round(N, w); + } break; + case 1: + n = 2; + c = vec[0]; + h = 1; + w = 1; + break; + default: + assert(false && "Dimension is not between 1, 2, or 4"); + break; + } + } +}; + +} // namespace sophon +} // namespace glow diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index dd0363b8fe..3366e30bc4 100755 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -347,3 +347,7 @@ LIST(APPEND UNOPT_TESTS add_custom_target(test_unopt ${UNOPT_TESTS} WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + +if(GLOW_WITH_SOPHON) + add_subdirectory(Sophon) +endif() diff --git a/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp b/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp new file mode 100644 index 0000000000..9b5ef22b11 --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp @@ -0,0 +1,525 @@ +#include "Backends/Sophon/BM188x/BM1880CodeGen.h" +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "Backends/Sophon/BM188x/BM1880Backend.h" +#include "Backends/Sophon/BM188x/BM1880TargetTransformInfo.h" +#include "glow/Base/Type.h" +#include "glow/ExecutionEngine/ExecutionEngine.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "glow/IR/IRBuilder.h" + +#include "gtest/gtest.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" + +using namespace glow; + +TEST(SophonLIRTest, HIRExample) { + // HIR example + Module mod; + Function *F = mod.createFunction("TestHIR"); + auto *X = mod.createPlaceholder(ElemKind::FloatTy, {1, 1, 3}, "X", false); + auto *Pow1 = F->createPow("Pow1", X, 2.0); + auto *save1 = F->createSave("save", Pow1); + auto IR = llvm::make_unique(F); + IR->generateIR(); + IR->dump(); +} + +TEST(BM1880CodeGenTest, LIRConv) { + // LIR example + Module mod; + Function *F = mod.createFunction("TestLIRConv"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // N, C, H, W + auto *input = + bb.createWeightVar(glow::ElemKind::FloatTy, {1, 1, 3, 3}, "input", + WeightVar::MutabilityKind::Constant); + + // OC, IC, KH, KW + auto *conv_w = + bb.createWeightVar(glow::ElemKind::FloatTy, {3, 1, 3, 3}, "conv1.w", + WeightVar::MutabilityKind::Constant); + + // OC + auto *conv_b = bb.createWeightVar(glow::ElemKind::FloatTy, {3}, "conv1.b", + WeightVar::MutabilityKind::Constant); + + // N, C, H, W + auto *output = + bb.createWeightVar(glow::ElemKind::FloatTy, {1, 3, 3, 3}, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_weight = + bb.createAllocActivationInst("lmem.weight", conv_w->getType()); + auto *lmem_bias = + bb.createAllocActivationInst("lmem.bias", conv_b->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + + int rshift_width; + bool enable_relu = false; + int stream_id = 0; + int inst_id = 0; + + bool local_aligned = true; + bool local_not_aligned = false; + + std::vector depends; + rshift_width = 0; + // dma shape {1, 1, 3, 3} + // dma stride {9, 9, 3} + bb.createSophonMIGDMAGlobalToLocalInst("load.conv1.in", lmem_in, input, + {1, 1, 3, 3}, {9, 9, 3}, false, + local_aligned); + // dma shape {1, 3, 9, 1} + // dma stride {27, 9, 1} + bb.createSophonMIGDMAGlobalToLocalInst("load.conv1.weight", lmem_weight, + conv_w, {1, 3, 9, 1}, {27, 9, 1}, true, + local_not_aligned); + + bb.createSophonMIConvolutionQ8Inst( + "conv1", lmem_out, lmem_in, lmem_weight, lmem_bias, {1, 1}, {0, 0, 0, 0}, + {1, 1}, rshift_width, enable_relu, stream_id, inst_id, depends); + + // dma shape {1, 3, 3, 3} + // dma stride {27, 9, 3} + bb.createSophonMIGDMALocalToGlobalInst("store.conv1.output", output, lmem_out, + {1, 3, 3, 3}, {27, 9, 3}, false, + local_aligned); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc2", lmem_weight); + bb.createDeallocActivationInst("dealloc3", lmem_bias); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + IR->dump(); + BM1880AllocationsInfo allocInfo; + allocInfo.getAllocatedAddress()[input]; + allocInfo.getAllocatedAddress()[conv_w]; + allocInfo.getAllocatedAddress()[conv_b]; + allocInfo.getAllocatedAddress()[output]; + allocInfo.getAllocatedAddress()[lmem_in]; + allocInfo.getAllocatedAddress()[lmem_weight]; + allocInfo.getAllocatedAddress()[lmem_bias]; + allocInfo.getAllocatedAddress()[lmem_out]; + + std::unique_ptr codegen = + BM1880CodeGen::createCodeGen(IR.get(), allocInfo); + codegen->performCodeGen(); +} + +TEST(BM1880CodeGenTest, GenWeights) { + Module mod; + Function *F = mod.createFunction("TestGenWeights"); + auto IR = llvm::make_unique(F); + + auto *conv1_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {1, 1, 1, 1}, + 1, 0, "conv1.w"); + auto *conv1_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {1}, 1, 0, "conv1.b"); + auto *conv2_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {1, 1, 1, 1}, + 1, 0, "conv2.w"); + auto *conv2_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {1}, 1, 0, "conv2.b"); + + conv1_w_var->getHandle() = {1}; + conv1_b_var->getHandle() = {2}; + conv2_w_var->getHandle() = {3}; + conv2_b_var->getHandle() = {4}; + + BM1880AllocationsInfo allocInfo; + + auto &mem_lut = allocInfo.getAllocatedAddress(); + { + IRBuilder bb(IR.get()); + + auto *conv1_w = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 1, 1}, 1, 0, + "conv1.w", WeightVar::MutabilityKind::Constant); + + auto *conv1_b = + bb.createWeightVar(glow::ElemKind::Int16QTy, {1}, 1, 0, "conv1.b", + WeightVar::MutabilityKind::Constant); + + auto *conv2_w = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 1, 1}, 1, 0, + "conv2.w", WeightVar::MutabilityKind::Constant); + + auto *conv2_b = + bb.createWeightVar(glow::ElemKind::Int16QTy, {1}, 1, 0, "conv2.b", + WeightVar::MutabilityKind::Constant); + + IR->getVariableMap()[conv1_w_var] = conv1_w; + IR->getVariableMap()[conv1_b_var] = conv1_b; + IR->getVariableMap()[conv2_w_var] = conv2_w; + IR->getVariableMap()[conv2_b_var] = conv2_b; + + // The insert order is: + // conv1_w | conv1_b | conv2_w | conv2_b + // but the mem_allocator might want: + // conv2_w | conv2_b | conv1_w | conv1_b + mem_lut[conv1_w] = 0x3; + mem_lut[conv1_b] = 0x4; + mem_lut[conv2_w] = 0x0; + mem_lut[conv2_b] = 0x1; + } + std::unique_ptr backend(new BM1880Backend()); + std::vector u8_weights; + // Gen weights base on mem_lut + backend->generateWeights(IR.get(), allocInfo, u8_weights); + + // Check weight size + EXPECT_EQ(6, u8_weights.size()); + + // Check weight data + EXPECT_EQ(3, u8_weights[0]); + EXPECT_EQ(4, u8_weights[1]); + EXPECT_EQ(1, u8_weights[3]); + EXPECT_EQ(2, u8_weights[4]); +} + +TEST(BM1880CodeGenTest, LIRConvCompileRun) { + Module mod; + Function *F = mod.createFunction("TestLIRConv"); + auto IR = llvm::make_unique(F); + + // Because all importers will add "save_" prefix for input placeholder + // currently Sophon Backend uses this prefix to recognize input/output + auto *in_var = mod.createPlaceholder(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, + 0, "input", false); + auto *out_var = mod.createPlaceholder(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, + 1, 0, "save_output", false); + auto *conv_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {3, 1, 3, 3}, + 1, 0, "conv1.w"); + auto *conv_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b"); + Context ctx; + auto *inputTensor = ctx.allocate(in_var); + inputTensor->zero(); + auto input_handle = inputTensor->getHandle(); + input_handle = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + input_handle.dump(); + + // clang-format off + conv_w_var->getHandle() = { + 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0 + }; + conv_b_var->getHandle() = {0, 1, 2}; + // clang-format on + + conv_w_var->getHandle().dump(); + conv_b_var->getHandle().dump(); + auto *outputTensor = ctx.allocate(out_var); + // Hand-coded Memory Allocation + BM1880AllocationsInfo allocInfo; + auto &mem_lut = + allocInfo.getAllocatedAddress(); // Memory Look-up Table for Codegen + std::unique_ptr backend(new BM1880Backend()); + auto *TTI = backend->getTTI(); + { + IRBuilder bb(IR.get()); + + // N, C, H, W + auto *input = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, 0, "input", + WeightVar::MutabilityKind::Mutable); + + // IC, OC, KH, KW + auto *conv_w = + bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 1, 3, 3}, 1, 0, + "conv1.w", WeightVar::MutabilityKind::Constant); + + // OC + auto *conv_b = + bb.createWeightVar(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b", + WeightVar::MutabilityKind::Constant); + + // N, C, H, W + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, 1, 0, + "output1", WeightVar::MutabilityKind::Mutable); + IR->getVariableMap()[in_var] = input; + IR->getVariableMap()[out_var] = output; + IR->getVariableMap()[conv_w_var] = conv_w; + IR->getVariableMap()[conv_b_var] = conv_b; + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_weight = + bb.createAllocActivationInst("lmem.weight", conv_w->getType()); + auto *lmem_bias = + bb.createAllocActivationInst("lmem.bias", conv_b->getType()); + auto *lmem_out = + bb.createAllocActivationInst("lmem.out", output->getType()); + + bool enable_relu = false; + int stream_id = 0; + int inst_id = 0; + + bool local_aligned = true; + bool local_not_aligned = false; + + std::vector depends; + int rshift_width = 0; + + // Input + // dma shape {1, 1, 3, 3} + // dma stride {9, 9, 3} + bb.createSophonMIGDMAGlobalToLocalInst("load.conv1.in", lmem_in, input, + {1, 1, 3, 3}, {9, 9, 3}, false, + local_aligned); + + // Weight + // dma shape {1, 3, 9, 1} + // dma stride {27, 9, 1} + bb.createSophonMIGDMAGlobalToLocalInst("load.conv1.weight", lmem_weight, + conv_w, {1, 3, 9, 1}, {27, 9, 1}, + true, local_not_aligned); + + // Bias + // dma shape {2, 3, 1, 1} + // dma stride {3, 1, 1} + bb.createSophonMIGDMAGlobalToLocalInst("load.conv1.bias", lmem_bias, conv_b, + {2, 3, 1, 1}, {3, 1, 1}, true, + local_not_aligned); + + bb.createSophonMIConvolutionQ8Inst("conv1", lmem_out, lmem_in, lmem_weight, + lmem_bias, {1, 1}, {1, 1, 1, 1}, {1, 1}, + rshift_width, enable_relu, stream_id, + inst_id, depends); + + // Output + // dma shape {1, 3, 3, 3} + // dma stride {27, 9, 3} + bb.createSophonMIGDMALocalToGlobalInst("store.conv1.output", output, + lmem_out, {1, 3, 3, 3}, {27, 9, 3}, + false, local_aligned); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc2", lmem_weight); + bb.createDeallocActivationInst("dealloc3", lmem_bias); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + // Global Memory Allocation + // For Neuron + mem_lut[input] = 0x0; + mem_lut[output] = 0x10; + // For Weight + mem_lut[conv_w] = 0x0; + mem_lut[conv_b] = 27; + allocInfo.setActivationsMemSize(48); + + // Local Memory Allocation + // Rule: allocate aligned tensors, then non-aligned tensors + // Order: lmem_in, lmem_out, lmem_weight, lmem_bias + mem_lut[lmem_in] = 0x0; + mem_lut[lmem_out] = mem_lut[lmem_in] + TTI->getLMemSizeFromValue(lmem_in); + mem_lut[lmem_weight] = + mem_lut[lmem_out] + TTI->getLMemSizeFromValue(lmem_out); + mem_lut[lmem_bias] = + mem_lut[lmem_weight] + TTI->getLMemSizeFromValue(lmem_weight); + } + + backend->reorderWeights(IR.get()); + backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + + auto H = outputTensor->getHandle(); + EXPECT_EQ(H.at({0, 0, 0, 0}), 1); + EXPECT_EQ(H.at({0, 0, 0, 1}), 2); + EXPECT_EQ(H.at({0, 0, 0, 2}), 3); + EXPECT_EQ(H.at({0, 0, 1, 0}), 4); + EXPECT_EQ(H.at({0, 0, 1, 1}), 5); + EXPECT_EQ(H.at({0, 0, 1, 2}), 6); + EXPECT_EQ(H.at({0, 0, 2, 0}), 7); + EXPECT_EQ(H.at({0, 0, 2, 1}), 8); + EXPECT_EQ(H.at({0, 0, 2, 2}), 9); + + EXPECT_EQ(H.at({0, 1, 0, 0}), 3); + EXPECT_EQ(H.at({0, 1, 0, 1}), 5); + EXPECT_EQ(H.at({0, 1, 0, 2}), 7); + EXPECT_EQ(H.at({0, 1, 1, 0}), 9); + EXPECT_EQ(H.at({0, 1, 1, 1}), 11); + EXPECT_EQ(H.at({0, 1, 1, 2}), 13); + EXPECT_EQ(H.at({0, 1, 2, 0}), 15); + EXPECT_EQ(H.at({0, 1, 2, 1}), 17); + EXPECT_EQ(H.at({0, 1, 2, 2}), 19); + + EXPECT_EQ(H.at({0, 2, 0, 0}), 5); + EXPECT_EQ(H.at({0, 2, 0, 1}), 8); + EXPECT_EQ(H.at({0, 2, 0, 2}), 11); + EXPECT_EQ(H.at({0, 2, 1, 0}), 14); + EXPECT_EQ(H.at({0, 2, 1, 1}), 17); + EXPECT_EQ(H.at({0, 2, 1, 2}), 20); + EXPECT_EQ(H.at({0, 2, 2, 0}), 23); + EXPECT_EQ(H.at({0, 2, 2, 1}), 26); + EXPECT_EQ(H.at({0, 2, 2, 2}), 29); +} + +TEST(BM1880CodeGenTest, LIRFCRun) { + Module mod; + Function *F = mod.createFunction("TestLIRFC"); + + // L Matrix = M x K + // R Matrix = K x N + // Y Matrix = M x N + // B = N + const unsigned M = 1; + const unsigned K = 1024; + const unsigned N = 1024; + + auto IR = llvm::make_unique(F); + auto *var_fc_l = mod.createPlaceholder(glow::ElemKind::Int8QTy, {M, K}, 1, 0, + "fc.l", false); + auto *var_fc_r = + mod.createConstant(glow::ElemKind::Int8QTy, {K, N}, 1, 0, "fc.r"); + auto *var_fc_b = + mod.createConstant(glow::ElemKind::Int16QTy, {N}, 1, 0, "fc.b"); + auto *var_fc_y = mod.createPlaceholder(glow::ElemKind::Int8QTy, {M, N}, 1, 0, + "save_fc_y", false); + + // Initialize Constant + + // K x N 8, 8, 8, ..., + // 8, 8, 8, ..., + // 8, 8, 8, ..., + // ... + // -8, -8, -8, ..., + // -8, -8, -8, ..., + // -8, -8, -8, ..., + // + + for (unsigned i = 0; i < K; i++) { + for (unsigned j = 0; j < N; j++) { + int val; + if (i < (K / 2 + 1)) { + val = 8; + } else { + val = -8; + } + var_fc_r->getHandle().at({i, j}) = val; + } + } + + // { 2, 2, 2, 2, ..., 0, 0, 0} + for (unsigned i = 0; i < N; i++) { + int val; + if ((i / 32) % 2 == 0) { + val = 2; + } else { + val = 0; + } + var_fc_b->getHandle().at({i}) = val; + } + + Context ctx; + auto *ctx_fc_l = ctx.allocate(var_fc_l); + auto *ctx_fc_y = ctx.allocate(var_fc_y); + + // Initialize Placeholder + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < K; j++) { + ctx_fc_l->getHandle().at({i, j}) = 1; + } + } + + IRBuilder bb(IR.get()); + + // <1, 1024> = <1, 32, 1, 32> + auto *mat_L = bb.createWeightVar(glow::ElemKind::Int8QTy, {M, K}, 1, 0, + "mat.L", WeightVar::MutabilityKind::Mutable); + // <1024, 1024> = <1024, 32, 1, 32> + auto *mat_R = + bb.createWeightVar(glow::ElemKind::Int8QTy, {K, N}, 1, 0, "mat.R", + WeightVar::MutabilityKind::Constant); + // <1024> = <2, 32, 1, 32> + auto *bias = bb.createWeightVar(glow::ElemKind::Int16QTy, {N}, 1, 0, "bias", + WeightVar::MutabilityKind::Constant); + // <1, 1024> = <1, 32, 1, 32> + auto *mat_Y = bb.createWeightVar(glow::ElemKind::Int8QTy, {M, N}, 1, 0, + "mat.Y", WeightVar::MutabilityKind::Mutable); + + IR->getVariableMap()[var_fc_l] = mat_L; + IR->getVariableMap()[var_fc_r] = mat_R; + IR->getVariableMap()[var_fc_b] = bias; + IR->getVariableMap()[var_fc_y] = mat_Y; + + auto *lmem_l = bb.createAllocActivationInst("lmem.L", mat_L->getType()); + auto *lmem_r = bb.createAllocActivationInst("lmem.R", mat_R->getType()); + auto *lmem_b = bb.createAllocActivationInst("lmem.B", bias->getType()); + auto *lmem_y = bb.createAllocActivationInst("lmem.Y", mat_Y->getType()); + + bool local_aligned = true; + bool local_not_aligned = false; + + // Matrix L + // dma shape { 1, 32, 1, 32} + // dma stride {32 * 32, 32, 32} + bb.createSophonMIGDMAGlobalToLocalInst("load.fc.l", lmem_l, mat_L, + {1, 32, 1, 32}, {128 * 32, 32, 32}, + false, local_aligned); + + // Matrix R + // dma shape { 1024, 32, 1, 32} + // dma stride {32 * 32, 32, 32} + bb.createSophonMIGDMAGlobalToLocalInst("load.fc.r", lmem_r, mat_R, + {1024, 32, 1, 32}, {32 * 32, 32, 32}, + true, local_aligned); + + // Bias + // dma shape { 2, 32, 1, 32} + // dma stride {32 * 32, 32, 32} + bb.createSophonMIGDMAGlobalToLocalInst("load.fc.b", lmem_b, bias, + {2, 32, 1, 32}, {32 * 32, 32, 32}, + true, local_not_aligned); + + unsigned rshift = 0; + unsigned lshift = 0; + bool result_add = false; + + bb.createSophonMIFCQ8Inst("FC1", lmem_y, lmem_l, lmem_r, lmem_b, rshift, + lshift, result_add); + + // Matrix Y + // dma shape { 1, 32, 1, 32} + // dma stride {32 * 32, 32, 32} + bb.createSophonMIGDMALocalToGlobalInst("store.fc.y", mat_Y, lmem_y, + {1, 32, 1, 32}, {32 * 32, 32, 32}, + false, local_aligned); + + BM1880AllocationsInfo allocInfo; + auto &mem_lut = allocInfo.getAllocatedAddress(); + + // Global Memory Allocation + // For Neuron + mem_lut[mat_L] = 0; // size: M x K + mem_lut[mat_Y] = M * K; // size: M x N + // For Weight + mem_lut[mat_R] = 0; // size: K x N + mem_lut[bias] = K * N; // size: 2 x N + + // Local Memory Allocation + mem_lut[lmem_l] = 0x0; // size: 1 x 32 + mem_lut[lmem_y] = 1 * 32; // size: 1 x 32 + mem_lut[lmem_r] = 2 * 32; // size: 1024 x 32 + mem_lut[lmem_b] = 2 * 32 + 1024 * 32; // size: 2 x 32 + + std::unique_ptr backend(new BM1880Backend()); + backend->reorderWeights(IR.get()); + backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + + auto H = ctx_fc_y->getHandle(); + for (unsigned i = 0; i < N; i++) { + int ans; + if ((i / 32) % 2 == 0) { + ans = 18; + } else { + ans = 16; + } + EXPECT_EQ(H.at({0, i}), ans); + } +} diff --git a/tests/unittests/Sophon/Backends/BM1880DeleteQuantizeNodeTest.cpp b/tests/unittests/Sophon/Backends/BM1880DeleteQuantizeNodeTest.cpp new file mode 100644 index 0000000000..8d969ff8b3 --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880DeleteQuantizeNodeTest.cpp @@ -0,0 +1,49 @@ +#include "Backends/Sophon/BM188x/BM1880Backend.h" +#include "glow/Base/Type.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "glow/Graph/Node.h" + +#include "gtest/gtest.h" + +using namespace glow; + +TEST(BM1880DeleteQuantizeNodeTest, DeleteQNode) { + Module mod; + Context ctx; + Function *F = mod.createFunction("main"); + auto *input = + mod.createPlaceholder(ElemKind::FloatTy, {1, 1, 3, 3}, "input", false); + auto *conv_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, + 1, 0, "conv1.w"); + auto *conv_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b"); + + auto *inputTensor = ctx.allocate(input); + // clang-format off + inputTensor->getHandle()= {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; + conv_w_var->getHandle() = { + 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0 + }; + conv_b_var->getHandle() = {0, 1, 2}; + // clang-format on + + TypeRef qty = mod.uniqueType(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, 0); + auto *QN = F->createQuantize("Quan", input, qty); + TypeRef ty = mod.uniqueType(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, 1, 0); + auto *conv = F->addNode( + new SophonConvolutionQ8Node("conv", ty, QN, conv_w_var, conv_b_var, + {1, 1}, {1, 1, 1, 1}, {1, 1}, 0, false)); + auto *DN = F->createDequantize("DQuan", conv); + auto *SN = F->createSave("save", DN); + auto *savePlaceholder = SN->getPlaceholder(); + savePlaceholder->setName("save_output"); + auto *outputTensor = ctx.allocate(savePlaceholder); + + EXPECT_EQ(4, F->getNodes().size()); + std::unique_ptr backend(new BM1880Backend()); + backend->transformPreLowering(F, CompilationMode::Infer); + EXPECT_EQ(2, F->getNodes().size()); +} diff --git a/tests/unittests/Sophon/Backends/BM1880Expand.cpp b/tests/unittests/Sophon/Backends/BM1880Expand.cpp new file mode 100644 index 0000000000..14f77151df --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880Expand.cpp @@ -0,0 +1,445 @@ +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "Backends/Sophon/BM188x/BM1880Backend.h" +#include "Backends/Sophon/BM188x/BM1880ExpandSophonInst.h" +#include "Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h" +#include "glow/Base/Type.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/Instrs.h" +#include "glow/Optimizer/Optimizer.h" +#include "gtest/gtest.h" + +#include "llvm/Support/Casting.h" + +using namespace glow; + +TEST(BM1880ExpandTest, ExpandConvQ8) { + Module mod; + Function *F = mod.createFunction("TestLIRConv"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // N, C, H, W + auto *input = bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, 0, + "input", WeightVar::MutabilityKind::Mutable); + + // IC, OC, KH, KW + auto *conv_w = + bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 1, 3, 3}, 1, 0, "conv1.w", + WeightVar::MutabilityKind::Constant); + + // OC + auto *conv_b = + bb.createWeightVar(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b", + WeightVar::MutabilityKind::Constant); + + // N, C, H, W + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 3, 1, 1}, 1, 0, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_weight = + bb.createAllocActivationInst("lmem.weight", conv_w->getType()); + auto *lmem_bias = + bb.createAllocActivationInst("lmem.bias", conv_b->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + + unsigned int pad_top, pad_left, pad_bottom, pad_right; + unsigned int dilation_h, dilation_w; + unsigned int stride_h, stride_w; + int rshift_width; + bool enable_relu = false; + std::vector depends; + + pad_top = pad_left = pad_bottom = pad_right = 0; + dilation_h = dilation_w = 1; + stride_h = stride_w = 1; + rshift_width = 0; + + bb.createSophonConvolutionQ8Inst( + "conv1", lmem_out, lmem_in, lmem_weight, lmem_bias, {stride_h, stride_w}, + {pad_top, pad_left, pad_bottom, pad_right}, {dilation_h, dilation_w}, + rshift_width, enable_relu); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc2", lmem_weight); + bb.createDeallocActivationInst("dealloc3", lmem_bias); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 4); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIConvolutionQ8InstKind); + auto *MI_inst = llvm::cast(cur_inst); + EXPECT_EQ(MI_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(MI_inst->getSrc()->getName(), lmem_in->getName()); + EXPECT_EQ(MI_inst->getFilter()->getName(), lmem_weight->getName()); + EXPECT_EQ(MI_inst->getBias()->getName(), lmem_bias->getName()); + EXPECT_EQ(MI_inst->getStrideHW(), + (llvm::ArrayRef{stride_h, stride_w})); + EXPECT_EQ( + MI_inst->getPadTLBR(), + (llvm::ArrayRef{pad_top, pad_left, pad_bottom, pad_right})); + EXPECT_EQ(MI_inst->getDilationHW(), + (llvm::ArrayRef{dilation_h, dilation_w})); + EXPECT_EQ(MI_inst->getRShiftWidth(), rshift_width); + EXPECT_EQ(MI_inst->getEnableRelu(), enable_relu); +} + +TEST(BM1880ExpandTest, ExpandMaxPoolingQ8) { + Module mod; + Function *F = mod.createFunction("TestMaxPool"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // N, C, H, W + auto *input = bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 4, 4}, 1, 0, + "input", WeightVar::MutabilityKind::Mutable); + + // N, C, H, W + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 2, 2}, 1, 0, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + + uint32_t kernel_h, kernel_w; + uint32_t stride_h, stride_w; + uint32_t pad_top, pad_left, pad_bottom, pad_right; + uint32_t rshift_width; + uint32_t multiplier; + + kernel_h = kernel_w = 2; + stride_h = stride_w = 2; + pad_top = pad_left = pad_bottom = pad_right = 0; + rshift_width = 0; + multiplier = 3; + + bb.createSophonMaxPoolQ8Inst( + "pool", lmem_out, lmem_in, {kernel_h, kernel_w}, {stride_h, stride_w}, + {pad_top, pad_left, pad_bottom, pad_right}, rshift_width, multiplier); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIMaxPoolingQ8InstKind); + auto *pool_inst = llvm::cast(cur_inst); + EXPECT_EQ(pool_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(pool_inst->getSrc()->getName(), lmem_in->getName()); + EXPECT_EQ(pool_inst->getKernelHW(), + (llvm::ArrayRef{kernel_h, kernel_w})); + EXPECT_EQ(pool_inst->getStrideHW(), + (llvm::ArrayRef{stride_h, stride_w})); + EXPECT_EQ( + pool_inst->getPadTLBR(), + (llvm::ArrayRef{pad_top, pad_left, pad_bottom, pad_right})); + std::advance(cur_inst, 1); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIMulConstQ8InstKind); + auto *mul_inst = llvm::cast(cur_inst); + EXPECT_EQ(mul_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(mul_inst->getSrc()->getName(), lmem_out->getName()); + EXPECT_EQ(mul_inst->getMultiplier(), multiplier); + EXPECT_EQ(mul_inst->getIsMultiplierSigned(), 0); + EXPECT_EQ(mul_inst->getRShiftWidth(), rshift_width); +} + +TEST(BM1880ExpandTest, ExpandReluQ8) { + Module mod; + Function *F = mod.createFunction("TestRelu"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // N, C, H, W + auto *input = bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 4, 4}, 1, 0, + "input", WeightVar::MutabilityKind::Mutable); + + // N, C, H, W + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {1, 1, 4, 4}, 1, 0, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + bb.createSophonReluQ8Inst("relu", lmem_out, lmem_in); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIReluQ8InstKind); + auto *relu_inst = llvm::cast(cur_inst); + EXPECT_EQ(relu_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(relu_inst->getSrc()->getName(), lmem_in->getName()); +} + +TEST(BM1880ExpandTest, ExpandFcQ8) { + Module mod; + Function *F = mod.createFunction("TestFc"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // in_row * in_col + auto *input = bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 4}, 1, 0, + "input", WeightVar::MutabilityKind::Mutable); + + // in_col * out_col + auto *weight = + bb.createWeightVar(glow::ElemKind::Int8QTy, {4, 2}, 1, 0, "weight", + WeightVar::MutabilityKind::Constant); + + // bias + auto *bias = bb.createWeightVar(glow::ElemKind::Int16QTy, {2}, 1, 0, "bias", + WeightVar::MutabilityKind::Constant); + // in_row * out_col + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 2}, 1, 0, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_weights = + bb.createAllocActivationInst("lmem.weights", weight->getType()); + auto *lmem_bias = bb.createAllocActivationInst("lmem.bias", bias->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + + bool relu = false; + uint32_t rshift_width = 5; + uint32_t lshift_width = 4; + bool result_add = false; + + bb.createSophonFullyConnectedQ8Inst("fc", lmem_out, lmem_in, lmem_weights, + lmem_bias, relu, rshift_width, + lshift_width, result_add); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc2", lmem_weights); + bb.createDeallocActivationInst("dealloc3", lmem_bias); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 4); + EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIFCQ8InstKind); + auto *fc_inst = llvm::cast(cur_inst); + EXPECT_EQ(fc_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(fc_inst->getSrc()->getName(), lmem_in->getName()); + EXPECT_EQ(fc_inst->getFilter()->getName(), lmem_weights->getName()); + EXPECT_EQ(fc_inst->getBias()->getName(), lmem_bias->getName()); + EXPECT_EQ(fc_inst->getRShiftWidth(), rshift_width); + // backend hack lshift value as 3 + EXPECT_EQ(fc_inst->getLShiftWidth(), 3); + EXPECT_EQ(fc_inst->getResultAdd(), result_add); +} + +TEST(BM1880ExpandTest, ExpandFcReluQ8) { + Module mod; + Function *F = mod.createFunction("TestFc"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + // in_row * in_col + auto *input = bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 4}, 1, 0, + "input", WeightVar::MutabilityKind::Mutable); + + // in_col * out_col + auto *weight = + bb.createWeightVar(glow::ElemKind::Int8QTy, {4, 2}, 1, 0, "weight", + WeightVar::MutabilityKind::Constant); + + // bias + auto *bias = bb.createWeightVar(glow::ElemKind::Int16QTy, {2}, 1, 0, "bias", + WeightVar::MutabilityKind::Constant); + // in_row * out_col + auto *output = + bb.createWeightVar(glow::ElemKind::Int8QTy, {3, 2}, 1, 0, "output1", + WeightVar::MutabilityKind::Mutable); + + auto *lmem_in = bb.createAllocActivationInst("lmem.in", input->getType()); + auto *lmem_weights = + bb.createAllocActivationInst("lmem.weights", weight->getType()); + auto *lmem_bias = bb.createAllocActivationInst("lmem.bias", bias->getType()); + auto *lmem_out = bb.createAllocActivationInst("lmem.out", output->getType()); + + bool relu = true; + uint32_t rshift_width = 5; + uint32_t lshift_width = 4; + bool result_add = false; + + bb.createSophonFullyConnectedQ8Inst("fc", lmem_out, lmem_in, lmem_weights, + lmem_bias, relu, rshift_width, + lshift_width, result_add); + + bb.createDeallocActivationInst("dealloc1", lmem_in); + bb.createDeallocActivationInst("dealloc2", lmem_weights); + bb.createDeallocActivationInst("dealloc3", lmem_bias); + bb.createDeallocActivationInst("dealloc4", lmem_out); + + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 4); + EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIFCQ8InstKind); + auto *fc_inst = llvm::cast(cur_inst); + EXPECT_EQ(fc_inst->getDest()->getName(), lmem_out->getName()); + EXPECT_EQ(fc_inst->getSrc()->getName(), lmem_in->getName()); + EXPECT_EQ(fc_inst->getFilter()->getName(), lmem_weights->getName()); + EXPECT_EQ(fc_inst->getBias()->getName(), lmem_bias->getName()); + EXPECT_EQ(fc_inst->getRShiftWidth(), rshift_width); + // backend hack lshift value as 3 + EXPECT_EQ(fc_inst->getLShiftWidth(), 3); + EXPECT_EQ(fc_inst->getResultAdd(), result_add); + std::advance(cur_inst, 1); + EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIReluQ8InstKind); +} + +TEST(BM1880ExpandTest, ExpandLoadQ8) { + Module mod; + Function *F = mod.createFunction("ExpandLoadStoreQ8"); + auto *input = mod.createPlaceholder(ElemKind::Int8QTy, {1, 1, 3, 3}, 1, 0, + "input", false); + auto *filter = + mod.createConstant(ElemKind::Int8QTy, {3, 1, 3, 3}, 1, 0, "filter"); + auto *bias = mod.createConstant(ElemKind::Int16QTy, {3}, 1, 0, "bias"); + auto output_type = mod.uniqueType(ElemKind::Int8QTy, {1, 3, 3, 3}, 1, 0); + + auto *conv = F->addNode( + new SophonConvolutionQ8Node("conv", output_type, input, filter, bias, + {1, 1}, {1, 1, 1, 1}, {1, 1}, 0, false)); + auto *result = F->createSave("ret2", conv); + result->getPlaceholder(); + + auto IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + sophon::runInsertLoadStorePass(IR.get()); + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + auto cur_inst = IR->getInstrs().begin(); + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_input = llvm::cast(cur_inst); + EXPECT_EQ(load_input->getShapeNCHW(), (llvm::ArrayRef{1, 1, 3, 3})); + EXPECT_EQ(load_input->getGlobalStrideNCH(), + (llvm::ArrayRef{1 * 3 * 3, 3 * 3, 3})); + EXPECT_EQ(load_input->getIsGlobalWeightSpace(), false); + EXPECT_EQ(load_input->getIsLocalAligned(), true); + + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_weight = llvm::cast(cur_inst); + EXPECT_EQ(load_weight->getShapeNCHW(), + (llvm::ArrayRef{1, 3, 9, 1})); + EXPECT_EQ(load_weight->getGlobalStrideNCH(), + (llvm::ArrayRef{3 * 9, 9, 1})); + EXPECT_EQ(load_weight->getIsGlobalWeightSpace(), true); + EXPECT_EQ(load_weight->getIsLocalAligned(), false); + + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_bias = llvm::cast(cur_inst); + EXPECT_EQ(load_bias->getShapeNCHW(), (llvm::ArrayRef{2, 3, 1, 1})); + EXPECT_EQ(load_bias->getGlobalStrideNCH(), + (llvm::ArrayRef{3 * 1 * 1, 1 * 1, 1})); + EXPECT_EQ(load_bias->getIsGlobalWeightSpace(), true); + EXPECT_EQ(load_bias->getIsLocalAligned(), false); + + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMALocalToGlobalInstKind); + auto *store = llvm::cast(cur_inst); + EXPECT_EQ(store->getShapeNCHW(), (llvm::ArrayRef{1, 3, 3, 3})); + EXPECT_EQ(store->getGlobalStrideNCH(), + (llvm::ArrayRef{3 * 3 * 3, 3 * 3, 3})); + EXPECT_EQ(store->getIsGlobalWeightSpace(), false); +} + +TEST(BM1880ExpandTest, ExpandLoadStoreFcQ8) { + Module mod; + Function *F = mod.createFunction("TestLIRFc"); + + auto *input = mod.createPlaceholder(glow::ElemKind::Int8QTy, {1, 800}, 1, 0, + "input", false); + + auto *fc_right = + mod.createConstant(glow::ElemKind::Int8QTy, {800, 500}, 1, 0, "fc_right"); + + // bias + auto *fc_b = + mod.createConstant(glow::ElemKind::Int16QTy, {500}, 1, 0, "fc.b"); + + TypeRef ty = mod.uniqueType(glow::ElemKind::Int8QTy, {1, 500}, 1, 0); + auto *fc = F->addNode(new SophonFullyConnectedQ8Node( + "fc", ty, input, fc_right, fc_b, false, 1, 2, false)); + + auto *result = F->createSave("ret2", fc); + + auto IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + sophon::runInsertLoadStorePass(IR.get()); + BM1880AllocationsInfo tmp; + BM1880ExpandSophonInst(IR.get(), tmp).run(); + // TODO check result + auto cur_inst = IR->getInstrs().begin(); + // load input + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_input = llvm::cast(cur_inst); + EXPECT_EQ(load_input->getShapeNCHW(), + (llvm::ArrayRef{1, 25, 1, 32})); // 25=800/32 + EXPECT_EQ(load_input->getGlobalStrideNCH(), + (llvm::ArrayRef{25 * 1 * 32, 1 * 32, 32})); + EXPECT_EQ(load_input->getIsGlobalWeightSpace(), false); + EXPECT_EQ(load_input->getIsLocalAligned(), true); + + // loadr fc_right + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_weight = llvm::cast(cur_inst); + EXPECT_EQ(load_weight->getShapeNCHW(), + (llvm::ArrayRef{800, 16, 1, 32})); // 16=500/32 + EXPECT_EQ(load_weight->getGlobalStrideNCH(), + (llvm::ArrayRef{500, 1 * 32, 32})); + EXPECT_EQ(load_weight->getIsGlobalWeightSpace(), true); + EXPECT_EQ(load_weight->getIsLocalAligned(), true); + + // load fc_b + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); + auto *load_bias = llvm::cast(cur_inst); + EXPECT_EQ(load_bias->getShapeNCHW(), + (llvm::ArrayRef{2, 16, 1, 32})); // 16=500/32 + EXPECT_EQ(load_bias->getGlobalStrideNCH(), + (llvm::ArrayRef{500, 1 * 32, 32})); + EXPECT_EQ(load_bias->getIsGlobalWeightSpace(), true); + EXPECT_EQ(load_bias->getIsLocalAligned(), false); + + // store result + std::advance(cur_inst, 2); + EXPECT_EQ(cur_inst->getKind(), + glow::Kinded::Kind::SophonMIGDMALocalToGlobalInstKind); + // TODO add check +} diff --git a/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp b/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp new file mode 100644 index 0000000000..172b428c3d --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp @@ -0,0 +1,288 @@ +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "Backends/Sophon/BM188x/BM1880Backend.h" +#include "Backends/Sophon/BM188x/BM1880CodeGen.h" +#include "Backends/Sophon/BM188x/BM1880ExpandSophonInst.h" +#include "Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h" + +#include "glow/Base/Type.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "glow/IR/IRBuilder.h" +#include "glow/Optimizer/Optimizer.h" +#include "glow/Support/Debug.h" + +#include "gtest/gtest.h" + +#define DEBUG_TYPE "SophonLIRTest" + +using namespace glow; + +template +static auto Inst(T &t, size_t idx) -> decltype(t->getInstrs().begin()) { + auto cur_inst = t->getInstrs().begin(); + std::advance(cur_inst, idx); + return cur_inst; +} + +TEST(BM1880ExapndCodeGenTest, ConvQ8Run) { + Module mod; + Function *F = mod.createFunction("TestConvQ8Run"); + + // Because all importers will add "save_" prefix for input placeholder + // currently Sophon Backend uses this prefix to recognize input/output + auto *in_var = mod.createPlaceholder(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, + 0, "input", false); + auto *conv_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {3, 1, 3, 3}, + 1, 0, "conv1.w"); + auto *conv_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b"); + + // init input/output/weight + Context ctx; + auto *inputTensor = ctx.allocate(in_var); + // auto *outputTensor = ctx.allocate(out_var); + // clang-format off + inputTensor->getHandle()= {1, 2, 3, 4, 5, 6, 7, 8, 9}; + conv_w_var->getHandle() = { + 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0 + }; + conv_b_var->getHandle() = {0, 1, 2}; + // clang-format on + + DEBUG_GLOW(inputTensor->getHandle().dump()); + DEBUG_GLOW(conv_w_var->getHandle().dump()); + DEBUG_GLOW(conv_b_var->getHandle().dump()); + + // create glow HIR + TypeRef ty = mod.uniqueType(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, 1, 0); + auto *conv = F->addNode( + new SophonConvolutionQ8Node("conv", ty, in_var, conv_w_var, conv_b_var, + {1, 1}, {1, 1, 1, 1}, {1, 1}, 0, false)); + auto *save = F->createSave("save", conv); + auto *savePlaceholder = save->getPlaceholder(); + savePlaceholder->setName("save_output"); + auto *outputTensor = ctx.allocate(savePlaceholder); + + // create glow LIR + auto IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + + // Hand-coded Memory Allocation + BM1880AllocationsInfo allocInfo; + auto &mem_lut = + allocInfo.getAllocatedAddress(); // Memory Look-up Table for Codegen + // For Global Neuron + mem_lut[IR->getVariableMap()[in_var]] = 0x0; + mem_lut[IR->getVariableMap()[savePlaceholder]] = 0x0; + // For Global Weight + mem_lut[IR->getVariableMap()[conv_w_var]] = 0x0; + mem_lut[IR->getVariableMap()[conv_b_var]] = 27; + + // run backend flow + std::unique_ptr backend(new BM1880Backend()); + backend->reorderWeights(IR.get()); + sophon::runInsertLoadStorePass(IR.get()); + BM1880ExpandSophonInst(IR.get(), allocInfo).run(); + + // after ExpandSophonInst + // 0 %fc_save_output = allocactivation + // 1 %fc_fc_l = allocactivation + // 2 %fc_fc_l_load1 = sophonmigdmaglobaltolocal + // 3 %fc_fc_r = allocactivation + // 4 %fc_fc_r_load1 = sophonmigdmaglobaltolocal + // 5 %fc_fc_b = allocactivation + // 6 %fc_fc_b_load1 = sophonmigdmaglobaltolocal + // 7 %fc = sophonmifcq8 + // 8 %fc_save_output_store1 = sophonmigdmalocaltoglobal + // 9 %dealloc1 = deallocactivation + // 10 %dealloc2 = deallocactivation + // 11 %dealloc3 = deallocactivation + // 12 %dealloc4 = deallocactivation + + DEBUG_GLOW(IR->dump()); + + // Hand-coded for Local Memory + // output + mem_lut[llvm::cast(Inst(IR, 0))] = 0; // size: 1 x 32 + // input + mem_lut[llvm::cast(Inst(IR, 1))] = 1 * 32; // size: 1 x 32 + // filter + mem_lut[llvm::cast(Inst(IR, 3))] = 2 * 32; // size: 1024 x 32 + // bias + mem_lut[llvm::cast(Inst(IR, 5))] = 2 * 32 + 1024 * 32; // size: 2 * 32 + + // run + backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + + // check result + auto H = outputTensor->getHandle(); + EXPECT_EQ(H.at({0, 0, 0, 0}), 1); + EXPECT_EQ(H.at({0, 0, 0, 1}), 2); + EXPECT_EQ(H.at({0, 0, 0, 2}), 3); + EXPECT_EQ(H.at({0, 0, 1, 0}), 4); + EXPECT_EQ(H.at({0, 0, 1, 1}), 5); + EXPECT_EQ(H.at({0, 0, 1, 2}), 6); + EXPECT_EQ(H.at({0, 0, 2, 0}), 7); + EXPECT_EQ(H.at({0, 0, 2, 1}), 8); + EXPECT_EQ(H.at({0, 0, 2, 2}), 9); + + EXPECT_EQ(H.at({0, 1, 0, 0}), 3); + EXPECT_EQ(H.at({0, 1, 0, 1}), 5); + EXPECT_EQ(H.at({0, 1, 0, 2}), 7); + EXPECT_EQ(H.at({0, 1, 1, 0}), 9); + EXPECT_EQ(H.at({0, 1, 1, 1}), 11); + EXPECT_EQ(H.at({0, 1, 1, 2}), 13); + EXPECT_EQ(H.at({0, 1, 2, 0}), 15); + EXPECT_EQ(H.at({0, 1, 2, 1}), 17); + EXPECT_EQ(H.at({0, 1, 2, 2}), 19); + + EXPECT_EQ(H.at({0, 2, 0, 0}), 5); + EXPECT_EQ(H.at({0, 2, 0, 1}), 8); + EXPECT_EQ(H.at({0, 2, 0, 2}), 11); + EXPECT_EQ(H.at({0, 2, 1, 0}), 14); + EXPECT_EQ(H.at({0, 2, 1, 1}), 17); + EXPECT_EQ(H.at({0, 2, 1, 2}), 20); + EXPECT_EQ(H.at({0, 2, 2, 0}), 23); + EXPECT_EQ(H.at({0, 2, 2, 1}), 26); + EXPECT_EQ(H.at({0, 2, 2, 2}), 29); +} + +TEST(BM1880ExapndCodeGenTest, LIRFC) { + Module mod; + Function *F = mod.createFunction("TestLIRFC"); + + // L Matrix = M x K + // R Matrix = K x N + // Y Matrix = M x N + // B = N + const unsigned M = 1; + const unsigned K = 1024; + const unsigned N = 1024; + + auto *var_fc_l = mod.createPlaceholder(glow::ElemKind::Int8QTy, {M, K}, 1, 0, + "fc.l", false); + auto *var_fc_r = + mod.createConstant(glow::ElemKind::Int8QTy, {K, N}, 1, 0, "fc.r"); + auto *var_fc_b = + mod.createConstant(glow::ElemKind::Int16QTy, {N}, 1, 0, "fc.b"); + + // Initialize Constant + + // K x N 8, 8, 8, ..., + // 8, 8, 8, ..., + // 8, 8, 8, ..., + // ... + // -8, -8, -8, ..., + // -8, -8, -8, ..., + // -8, -8, -8, ..., + // + + for (unsigned i = 0; i < K; i++) { + for (unsigned j = 0; j < N; j++) { + int val; + if (i < (K / 2 + 1)) { + val = 8; + } else { + val = -8; + } + var_fc_r->getHandle().at({i, j}) = val; + } + } + + // { 2, 2, 2, 2, ..., 0, 0, 0} + for (unsigned i = 0; i < N; i++) { + int val; + if ((i / 32) % 2 == 0) { + val = 2; + } else { + val = 0; + } + var_fc_b->getHandle().at({i}) = val; + } + + Context ctx; + auto *ctx_fc_l = ctx.allocate(var_fc_l); + + // Initialize Placeholder + for (unsigned i = 0; i < M; i++) { + for (unsigned j = 0; j < K; j++) { + ctx_fc_l->getHandle().at({i, j}) = 1; + } + } + + // 1. create glow HIR + TypeRef ty = mod.uniqueType(glow::ElemKind::Int8QTy, {M, N}, 1, 0); + auto *fc = F->addNode(new SophonFullyConnectedQ8Node( + "fc", ty, var_fc_l, var_fc_r, var_fc_b, false, 0, 0, false)); + + auto *save = F->createSave("save", fc); + auto *savePlaceholder = save->getPlaceholder(); + savePlaceholder->setName("save_output"); + auto *outputTensor = ctx.allocate(savePlaceholder); + + // 2. create glow LIR + auto IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + + // 3. Hand-coded for Global Memory Allocation + BM1880AllocationsInfo allocInfo; + auto &mem_lut = + allocInfo.getAllocatedAddress(); // Memory Look-up Table for Codegen + // For Global Neuron + mem_lut[IR->getVariableMap()[var_fc_l]] = 0x0; // size: M x K + mem_lut[IR->getVariableMap()[savePlaceholder]] = M * K; // size: M x N + // For Global Weight + mem_lut[IR->getVariableMap()[var_fc_r]] = 0x0; // size: K x N + mem_lut[IR->getVariableMap()[var_fc_b]] = K * N; // size: 2 X N + + // 4. run backend flow + std::unique_ptr backend(new BM1880Backend()); + backend->reorderWeights(IR.get()); + sophon::runInsertLoadStorePass(IR.get()); + BM1880ExpandSophonInst(IR.get(), allocInfo).run(); + + // 5. Hand-coded for Local Memory allocation + // expected MI LIR + // 0 %fc_save_output = allocactivation { Ty: i8[S:1.0000 + // O:0][-128.000,127.000]<1 x 1024>} 1 %fc_fc_l = allocactivation { Ty: + // i8[S:1.0000 O:0][-128.000,127.000]<1 x 1024>} 2 %fc_fc_l_load1 = + // sophonmigdmaglobaltolocal @out %fc_fc_l, @in %fc_l { ShapeNCHW: [1, 32, 1, + // 32], GlobalStrideNCH: [1024, 32, 32], IsGlobalWeightSpace: 0, + // IsLocalAligned: 1} 3 %fc_fc_r = allocactivation { Ty: i8[S:1.0000 + // O:0][-128.000,127.000]<1024 x 1024>} 4 %fc_fc_r_load1 = + // sophonmigdmaglobaltolocal @out %fc_fc_r, @in %fc_r { ShapeNCHW: [1024, 32, + // 1, 32], GlobalStrideNCH: [1024, 32, 32], IsGlobalWeightSpace: 1, + // IsLocalAligned: 1} 5 %fc_fc_b = allocactivation { Ty: i16[S:1.0000 + // O:0][-32768.000,32767.000]<1024>} 6 %fc_fc_b_load1 = + // sophonmigdmaglobaltolocal @out %fc_fc_b, @in %fc_b { ShapeNCHW: [2, 32, 1, + // 32], GlobalStrideNCH: [1024, 32, 32], IsGlobalWeightSpace: 1, + // IsLocalAligned: 0} + + IR->dump(); + // f_save_output + mem_lut[llvm::cast(Inst(IR, 0))] = 0; // size: 1 x 32 + // fc_l + mem_lut[llvm::cast(Inst(IR, 1))] = 1 * 32; // size: 1 x 32 + // fc_r + mem_lut[llvm::cast(Inst(IR, 3))] = 2 * 32; // size: 1024 x 32 + // fc_b + mem_lut[llvm::cast(Inst(IR, 5))] = 2 * 32 + 1024 * 32; // size: 2 * 32 + + // 6. run on cmodel + backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + + auto H = outputTensor->getHandle(); + for (unsigned i = 0; i < N; i++) { + int ans; + if ((i / 32) % 2 == 0) { + ans = 18; + } else { + ans = 16; + } + EXPECT_EQ(H.at({0, i}), ans); + } +} diff --git a/tests/unittests/Sophon/Backends/BM1880GetLMemSizeTest.cpp b/tests/unittests/Sophon/Backends/BM1880GetLMemSizeTest.cpp new file mode 100644 index 0000000000..4e4eae6094 --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880GetLMemSizeTest.cpp @@ -0,0 +1,94 @@ +#include "Backends/Sophon/BM188x/BM1880TargetTransformInfo.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/Instrs.h" +#include "gtest/gtest.h" + +using namespace glow; +TEST(BM1880GetLMemSizeTest, LIRConv) { + Module mod; + Function *F = mod.createFunction("TestGetLMemSize"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + auto q8 = [&](llvm::ArrayRef dims) { + return mod.uniqueType(glow::ElemKind::Int8QTy, dims, 1.0, 0); + }; + auto q16 = [&](llvm::ArrayRef dims) { + return mod.uniqueType(glow::ElemKind::Int16QTy, dims, 1.0, 0); + }; + auto *lmem_in = bb.createAllocActivationInst("tensor.in", q8({1, 64, 3, 3})); + auto *lmem_weight = bb.createAllocActivationInst("weight", q8({64, 3, 3, 3})); + auto *lmem_out = bb.createAllocActivationInst("tensor.out", q8({1, 3, 3, 3})); + auto *lmem_bias = + bb.createAllocActivationInst("bias", q16({64})); // <2, 64, 1, 1> + + unsigned int pad_top, pad_left, pad_bottom, pad_right; + unsigned int dilation_h, dilation_w; + unsigned int stride_h, stride_w; + int rshift_width; + bool enable_relu = false; + + pad_top = pad_left = pad_bottom = pad_right = 0; + dilation_h = dilation_w = 1; + stride_h = stride_w = 1; + rshift_width = 0; + + bb.createSophonConvolutionQ8Inst( + "conv1", lmem_out, lmem_in, lmem_weight, lmem_bias, {stride_h, stride_w}, + {pad_top, pad_left, pad_bottom, pad_right}, {dilation_h, dilation_w}, + rshift_width, enable_relu); + + auto *TTI = sophon::BM1880TargetTransformInfo::getInstance(); + size_t sz_in = TTI->getLMemSizeFromValue(lmem_in); + size_t sz_out = TTI->getLMemSizeFromValue(lmem_out); + size_t sz_weight = TTI->getLMemSizeFromValue(lmem_weight); + size_t sz_bias = TTI->getLMemSizeFromValue(lmem_bias); + EXPECT_EQ(sz_in, + 16 * 2); // channel size = 3*3 aligned to 16, 2 channels per lane + EXPECT_EQ(sz_weight, 64 * 3 * 3); // channel size = 3*3*64, 1 channel per lane + EXPECT_EQ(sz_out, + 16 * 1); // channel size = 3*3 aligned to 16, 1 channel per lane + EXPECT_EQ(sz_bias, 2 * 2); // channel size = 2*1*1, 2 channel per lane +} + +TEST(BM1880GetLMemSizeTest, LIRFC) { + Module mod; + Function *F = mod.createFunction("TestGetLMemSize"); + auto IR = llvm::make_unique(F); + IRBuilder bb(IR.get()); + + auto q8 = [&](llvm::ArrayRef dims) { + return mod.uniqueType(glow::ElemKind::Int8QTy, dims, 1.0, 0); + }; + auto q16 = [&](llvm::ArrayRef dims) { + return mod.uniqueType(glow::ElemKind::Int16QTy, dims, 1.0, 0); + }; + auto *lmem_L = + bb.createAllocActivationInst("matrix.L", q8({1, 800})); // <1, 26, 1, 32> + auto *lmem_R = bb.createAllocActivationInst( + "matrix.R", q8({800, 500})); // <800, 16, 1, 32> + auto *lmem_Y = + bb.createAllocActivationInst("matrix.Y", q8({1, 500})); // <1, 16, 1, 32> + auto *lmem_bias = + bb.createAllocActivationInst("bias", q16({500})); // <2, 16, 1, 32> + + bool enable_relu = false; + int shift_width = 0; + bool result_add = false; + + bb.createSophonFullyConnectedQ8Inst("fc1", lmem_Y, lmem_L, lmem_R, lmem_bias, + enable_relu, shift_width, shift_width, + result_add); + + auto *TTI = sophon::BM1880TargetTransformInfo::getInstance(); + size_t sz_L = TTI->getLMemSizeFromValue(lmem_L); + size_t sz_R = TTI->getLMemSizeFromValue(lmem_R); + size_t sz_Y = TTI->getLMemSizeFromValue(lmem_Y); + size_t sz_bias = TTI->getLMemSizeFromValue(lmem_bias); + + EXPECT_EQ(sz_L, 32); // channel size = 1 * 1 * 32, 1 channel per lane + EXPECT_EQ(sz_R, + 800 * 1 * 32); // channel size = 800 * 1 * 32, one channel per lane + EXPECT_EQ(sz_Y, 32); // channel size = 1 * 1 * 32, one channel per lane + EXPECT_EQ(sz_bias, 32 * 2); // channel size = 2 * 1 * 32, one channel per lane +} diff --git a/tests/unittests/Sophon/Backends/BM1880HIRSliceTest.cpp b/tests/unittests/Sophon/Backends/BM1880HIRSliceTest.cpp new file mode 100644 index 0000000000..cf2712e7da --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880HIRSliceTest.cpp @@ -0,0 +1,162 @@ +#include "glow/Base/Type.h" +#include "glow/IR/IR.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/Instrs.h" + +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include "gtest/gtest.h" + +using namespace glow; + +template class SD; +inline static int idiv_round(int pNumerator, int pDenominator) { + return (pNumerator + pDenominator - 1) / pDenominator; +} + +void sliceWeight(SophonConvolutionQ8Node *node, unsigned oh_max_slice) {} + +void sliceData(SophonConvolutionQ8Node *node, unsigned oh_max_slice) { + Function *F = node->getParent(); + auto dim_i = node->getInput().getType()->dims(); + auto dim_f = node->getFilter().getType()->dims(); + auto dim_r = node->getResult().getType()->dims(); + ElemKind elemtype = node->getResult().getType()->getElementType(); + + auto stride = node->getStrideHW(); + auto padTLBR = node->getPadTLBR(); + auto dilation = node->getDilationHW(); + + // create new Node + unsigned batch_sz = dim_i[0]; + unsigned ic = dim_i[1]; + unsigned ih = dim_i[2]; + unsigned iw = dim_i[3]; + + unsigned oc = dim_r[1]; + unsigned oh = dim_r[2]; + unsigned ow = dim_r[3]; + + unsigned kh = dim_f[2]; + unsigned stride_h = stride[0]; + std::vector pad_tlbr{1, padTLBR[1], 1, padTLBR[3]}; + std::vector slice_result; + unsigned ohsec = idiv_round(oh, oh_max_slice); + + for (int sec = 0; sec < ohsec; sec++) { + // Get current padding + + if (sec == 0) { + // First H slice + pad_tlbr[0] = padTLBR[0]; + pad_tlbr[2] = 0; + } else if (sec == (ohsec - 1)) { + // Last H slice + pad_tlbr[0] = 0; + pad_tlbr[2] = padTLBR[2]; + } else { + // Middle H slice + pad_tlbr[0] = 0; + pad_tlbr[2] = 0; + } + + unsigned oh_idx = sec * oh_max_slice; + unsigned oh_slice = oh_max_slice; + + // Last slice may be smaller + if (sec == ohsec - 1) { + oh_slice = oh - oh_idx; + } + + unsigned cur_slice_pad = pad_tlbr[0] + pad_tlbr[2]; + unsigned pad_begin = padTLBR[0]; + + unsigned ih_slice = (oh_slice - 1) * stride_h + kh - cur_slice_pad; + + // Make sure idx >= 0 + unsigned ih_idx = + std::max(0, static_cast(oh_idx * stride_h - pad_begin)); + + // Create input slice + std::array islice_shape{batch_sz, ic, ih_slice, iw}; + std::array oslice_shape{batch_sz, oc, oh_slice, ow}; + + auto type_islice = F->getParent()->uniqueType(elemtype, islice_shape, 1, 0); + + auto input = node->getInput(); + auto input_name = input.getNode()->getName(); + auto *node_input = F->addNode( + new SliceNode(input_name, type_islice, input, {0, 0, ih_idx, 0})); + auto nodevalue_filter = node->getFilter(); + auto nodevalue_bias = node->getBias(); + auto *type_output = + F->getParent()->uniqueType(elemtype, oslice_shape, 1, 0); + auto *node_conv = F->addNode(new SophonConvolutionQ8Node( + node->getName(), type_output, node_input, nodevalue_filter, + nodevalue_bias, stride, pad_tlbr, dilation, 0, false)); + + slice_result.push_back(node_conv->getResult()); + } + + // Concat all slices at H axis + auto result_name = node->getResult().getNode()->getName(); + auto *node_concat = F->createConcat(result_name, slice_result, 2); + NodeValue(node, 0).replaceAllUsesOfWith(node_concat); + + // Remove original node + F->eraseNode(node); +} + +TEST(BM1880HIRSliceTest, ConvQ8SliceData) { + Module mod; + Function *F = mod.createFunction("TestHIRSlice"); + + ElemKind dataty = ElemKind::Int8QTy; + SophonConvolutionQ8Node *node_conv; + { + std::vector stride_hw{2, 2}; + std::vector pad_tlbr{3, 3, 3, 3}; + std::vector dilation_hw{1, 1}; + + std::array shape_input{1, 3, 224, 224}; + std::array shape_output{1, 64, 112, 112}; + std::array shape_kernel{3, 64, 7, 7}; + std::array shape_bias{64}; + + auto *node_input = + mod.createPlaceholder(dataty, shape_input, 1, 0, "conv.in", false); + auto *node_filter = + mod.createConstant(dataty, shape_kernel, 1, 0, "conv.f"); + auto *node_bias = mod.createConstant(dataty, shape_bias, 1, 0, "conv.b"); + auto *type_output = mod.uniqueType(dataty, shape_output, 1, 0); + + unsigned rshift_width = 0; + bool enable_relu = false; + node_conv = F->addNode(new SophonConvolutionQ8Node( + "conv.1", type_output, node_input, node_filter, node_bias, stride_hw, + pad_tlbr, dilation_hw, rshift_width, enable_relu)); + } + { + std::vector kernel_hw{3, 3}; + std::vector stride_hw{2, 2}; + std::vector pad_tlbr{0, 0, 0, 0}; + std::array shape_output{1, 64, 56, 56}; + auto *type_output = mod.uniqueType(dataty, shape_output, 1, 0); + unsigned rshift = 0; + unsigned mul = 1; + bool round_mode = true; + F->addNode(new SophonMaxPoolQ8Node("maxpool.1", type_output, node_conv, + kernel_hw, stride_hw, pad_tlbr, rshift, + mul, true)); + } + + F->dumpDAG("conv_before.dot"); + sliceData(node_conv, 38); + F->dumpDAG("conv_after.dot"); + + auto IR = llvm::make_unique(F); + IR->generateIR(); + IR->dump(); +} diff --git a/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp b/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp new file mode 100644 index 0000000000..8c971f0b8b --- /dev/null +++ b/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp @@ -0,0 +1,149 @@ +#include "Backends/Sophon/BM188x/BM1880AllocationsInfo.h" +#include "Backends/Sophon/BM188x/BM1880Backend.h" +#include "Backends/Sophon/BM188x/BM1880CodeGen.h" +#include "Backends/Sophon/BM188x/BM1880ExpandSophonInst.h" +#include "Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h" + +#include "glow/Base/Type.h" +#include "glow/Graph/Context.h" +#include "glow/Graph/Graph.h" +#include "glow/IR/IRBuilder.h" +#include "glow/Support/Debug.h" +#include "llvm/Support/Debug.h" + +#include "gtest/gtest.h" + +#define DEBUG_TYPE "memory_alloc_test" + +using namespace glow; + +template +static auto Inst(T &t, size_t idx) -> decltype(t->getInstrs().begin()) { + auto cur_inst = t->getInstrs().begin(); + std::advance(cur_inst, idx); + return cur_inst; +} + +TEST(BM1880MemAllocTest, ConvMemAllocRun) { + Module mod; + Function *F = mod.createFunction("TestConvQ8Run"); + + // Because all importers will add "save_" prefix for input placeholder + // currently Sophon Backend uses this prefix to recognize input/output + auto *in_var = mod.createPlaceholder(glow::ElemKind::Int8QTy, {1, 1, 3, 3}, 1, + 0, "input", false); + auto *conv_w_var = mod.createConstant(glow::ElemKind::Int8QTy, {3, 1, 3, 3}, + 1, 0, "conv1.w"); + auto *conv_b_var = + mod.createConstant(glow::ElemKind::Int16QTy, {3}, 1, 0, "conv1.b"); + + // init input/output/weight + Context ctx; + auto *inputTensor = ctx.allocate(in_var); + // clang-format off + inputTensor->getHandle()= {1, 2, 3, 4, 5, 6, 7, 8, 9}; + conv_w_var->getHandle() = { + 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 0, 0, 0, 0 + }; + conv_b_var->getHandle() = {0, 1, 2}; + // clang-format on + + DEBUG_GLOW(inputTensor->getHandle().dump()); + DEBUG_GLOW(conv_w_var->getHandle().dump()); + DEBUG_GLOW(conv_b_var->getHandle().dump()); + + // create glow HIR + TypeRef ty = mod.uniqueType(glow::ElemKind::Int8QTy, {1, 3, 3, 3}, 1, 0); + auto *conv = F->addNode( + new SophonConvolutionQ8Node("conv", ty, in_var, conv_w_var, conv_b_var, + {1, 1}, {1, 1, 1, 1}, {1, 1}, 0, false)); + auto *save = F->createSave("save", conv); + auto *savePlaceholder = save->getPlaceholder(); + savePlaceholder->setName("save_output"); + auto *outputTensor = ctx.allocate(savePlaceholder); + + // create glow LIR + auto IR = llvm::make_unique(F); + IR->generateIR(); + + // run backend flow + std::unique_ptr backend(new BM1880Backend()); + BM1880AllocationsInfo alloc_info(ctx, backend->getTTI()); + backend->runOptimizationPasses(IR.get(), &alloc_info); + + // check alloc info + // [conv_conv1_b] = 128 + // [conv1_b] = 27 + // [save_output] = 9 + // [conv_conv1_w] = 64 + // [conv1_w] = 0 + // [conv_res] = 192 + // [conv_input] = 0 + // [input] = 0 + std::map result = { + {"conv1_b", 27}, {"conv_conv1_b", 128}, {"save_output", 9}, + {"conv_conv1_w", 64}, {"conv1_w", 0}, {"input", 0}, + {"conv_res", 192}, {"conv_input", 0}}; + auto &mem_lut = + alloc_info.getAllocatedAddress(); // Memory Look-up Table for Codegen + for (auto entry : mem_lut) { + EXPECT_EQ(result[entry.first->getName().str()], entry.second); + std::cout << "[" << entry.first->getName().str() << "] = " << entry.second + << "\n"; + } + +#if 0 + // after ExpandSophonInst + %conv1_w(0) = WeightVar i8[S:1.0000 O:0][-128.000,127.000]<1 x 3 x 3 x 3> const // size: 27 // Users: @in 4 + %conv1_b(27) = WeightVar i16[S:1.0000 O:0][-32768.000,32767.000]<3> const // size: 6 // Users: @in 6 + %input(0) = WeightVar i8[S:1.0000 O:0][-128.000,127.000]<1 x 1 x 3 x 3> mutable // size: 9 // Users: @in 2 + %save_output(9) = WeightVar i8[S:1.0000 O:0][-128.000,127.000]<1 x 3 x 3 x 3> mutable // size: 27 // Users: @out 8 + + 0 %conv_res(192) = allocactivation { Ty: i8[S:1.0000 O:0][-128.000,127.000]<1 x 3 x 3 x 3>} // size: 27 // Users: @in 8, @out 7, @out 9 + 1 %conv_input(0) = allocactivation { Ty: i8[S:1.0000 O:0][-128.000,127.000]<1 x 1 x 3 x 3>} // size: 9 // Users: @in 7, @out 2, @out 10 + 2 %conv_input_load1 = sophonmigdmaglobaltolocal @out %conv_input(16), @in %input { ShapeNCHW: [1, 1, 3, 3], GlobalStrideNCH: [9, 9, 3], IsGlobalWeightSpace: 0, IsLocalAligned: 1} + 3 %conv_conv1_w(64) = allocactivation { Ty: i8[S:1.0000 O:0][-128.000,127.000]<1 x 3 x 3 x 3>} // size: 27 // Users: @in 7, @out 4, @out 11 + 4 %conv_conv1_w_load1 = sophonmigdmaglobaltolocal @out %conv_conv1_w(32), @in %conv1_w { ShapeNCHW: [1, 3, 9, 1], GlobalStrideNCH: [27, 9, 1], IsGlobalWeightSpace: 1, IsLocalAligned: 0} + 5 %conv_conv1_b(128) = allocactivation { Ty: i16[S:1.0000 O:0][-32768.000,32767.000]<3>} // size: 6 // Users: @in 7, @out 6, @out 12 + 6 %conv_conv1_b_load1 = sophonmigdmaglobaltolocal @out %conv_conv1_b, @in %conv1_b { ShapeNCHW: [2, 3, 1, 1], GlobalStrideNCH: [3, 1, 1], IsGlobalWeightSpace: 1, IsLocalAligned: 0} + 7 %conv = sophonmiconvolutionq8 @out %conv_res(192), @in %conv_input(0), @in %conv_conv1_w(64), @in %conv_conv1_b(128) { StrideHW: [1, 1], PadTLBR: [1, 1, 1, 1], DilationHW: [1, 1], RShiftWidth: 0, EnableRelu: 0, StreamID: 0, InstID: 0, Depends: []} + 8 %save11 = sophonmigdmalocaltoglobal @out %save_output, @in %conv_res { ShapeNCHW: [1, 3, 3, 3], GlobalStrideNCH: [27, 9, 3], IsGlobalWeightSpace: 0, IsLocalAligned: 1} +#endif + + // codegen and run + backend->codegen(std::move(IR), &alloc_info)->execute(ctx); + + // check result + auto H = outputTensor->getHandle(); + EXPECT_EQ(H.at({0, 0, 0, 0}), 1); + EXPECT_EQ(H.at({0, 0, 0, 1}), 2); + EXPECT_EQ(H.at({0, 0, 0, 2}), 3); + EXPECT_EQ(H.at({0, 0, 1, 0}), 4); + EXPECT_EQ(H.at({0, 0, 1, 1}), 5); + EXPECT_EQ(H.at({0, 0, 1, 2}), 6); + EXPECT_EQ(H.at({0, 0, 2, 0}), 7); + EXPECT_EQ(H.at({0, 0, 2, 1}), 8); + EXPECT_EQ(H.at({0, 0, 2, 2}), 9); + + EXPECT_EQ(H.at({0, 1, 0, 0}), 3); + EXPECT_EQ(H.at({0, 1, 0, 1}), 5); + EXPECT_EQ(H.at({0, 1, 0, 2}), 7); + EXPECT_EQ(H.at({0, 1, 1, 0}), 9); + EXPECT_EQ(H.at({0, 1, 1, 1}), 11); + EXPECT_EQ(H.at({0, 1, 1, 2}), 13); + EXPECT_EQ(H.at({0, 1, 2, 0}), 15); + EXPECT_EQ(H.at({0, 1, 2, 1}), 17); + EXPECT_EQ(H.at({0, 1, 2, 2}), 19); + + EXPECT_EQ(H.at({0, 2, 0, 0}), 5); + EXPECT_EQ(H.at({0, 2, 0, 1}), 8); + EXPECT_EQ(H.at({0, 2, 0, 2}), 11); + EXPECT_EQ(H.at({0, 2, 1, 0}), 14); + EXPECT_EQ(H.at({0, 2, 1, 1}), 17); + EXPECT_EQ(H.at({0, 2, 1, 2}), 20); + EXPECT_EQ(H.at({0, 2, 2, 0}), 23); + EXPECT_EQ(H.at({0, 2, 2, 1}), 26); + EXPECT_EQ(H.at({0, 2, 2, 2}), 29); +} diff --git a/tests/unittests/Sophon/Backends/CMakeLists.txt b/tests/unittests/Sophon/Backends/CMakeLists.txt new file mode 100644 index 0000000000..7e54f15e4e --- /dev/null +++ b/tests/unittests/Sophon/Backends/CMakeLists.txt @@ -0,0 +1,39 @@ +include_directories(${bmtap_SOURCE_DIR}/include) +include_directories(${BMNET_GLOW_INCLUDE_DIRS}) + +######################## +# Target +set(Sophon_CHIP_LIST BM1680 BM1682 BM1880 BM1882) +string(REPLACE ";" ", " Sophon_CHIP_LIST_HELP "${Sophon_CHIP_LIST}") +set(Sophon_CHIP BM1880 CACHE STRING "chip version: ${Sophon_CHIP_LIST_HELP}") +set_property(CACHE Sophon_CHIP PROPERTY STRINGS ${Sophon_CHIP_LIST}) +set(Sophon_${Sophon_CHIP}_ENABLE ON) + +if (Sophon_BM1880_ENABLE) + add_glow_sophon_test( + NAME BM1880CodeGenTest + SRCS BM1880CodeGenTest.cpp + ARGS -enable-layer-group=false) + add_glow_sophon_test( + NAME BM1880Expand + SRCS BM1880Expand.cpp) + add_glow_sophon_test( + NAME BM1880ExpandCodeGenTest + SRCS BM1880ExpandCodeGenTest.cpp) + add_glow_sophon_test( + NAME BM1880GetLMemSizeTest + SRCS BM1880GetLMemSizeTest.cpp) + add_glow_sophon_test( + NAME BM1880MemoryAllocTest + SRCS BM1880MemoryAllocTest.cpp) + add_glow_sophon_test( + NAME BM1880DeleteQuantizeNodeTest + SRCS BM1880DeleteQuantizeNodeTest.cpp) + add_glow_sophon_test( + NAME BM1880HIRSliceTest + SRCS BM1880HIRSliceTest.cpp) +endif() + +add_glow_sophon_test( + NAME LoadStoreTest + SRCS LoadStoreTest.cpp) diff --git a/tests/unittests/Sophon/Backends/LoadStoreTest.cpp b/tests/unittests/Sophon/Backends/LoadStoreTest.cpp new file mode 100644 index 0000000000..f718b21d11 --- /dev/null +++ b/tests/unittests/Sophon/Backends/LoadStoreTest.cpp @@ -0,0 +1,117 @@ + +#include "Backends/Sophon/BM188x/BM1880HandleReshapePass.h" +#include "Backends/Sophon/BM188x/BM1880InsertLoadStorePass.h" +#include "glow/Graph/Graph.h" +#include "glow/IR/IR.h" +#include "glow/IR/IRBuilder.h" +#include "glow/IR/Instrs.h" +#include "glow/Optimizer/Optimizer.h" +#include "glow/Support/Debug.h" +#include "llvm/ADT/STLExtras.h" +#include "gtest/gtest.h" +#include + +using namespace glow; +using namespace std; + +template +static auto Inst(T &t, size_t idx) -> decltype(t->getInstrs().begin()) { + auto cur_inst = t->getInstrs().begin(); + std::advance(cur_inst, idx); + return cur_inst; +} + +class LoadStoreTest : public ::testing::Test { +protected: + void SetUp() override { + F = mod.createFunction("TestHIR"); + auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 32, 32, 3}, + "input", false); + ElemKind inputTy = input->getType()->getElementType(); + auto *filter = + mod.createPlaceholder(inputTy, {16, 5, 5, 3}, "filter", true); + auto *bias = mod.createPlaceholder(inputTy, {16}, "bias", true); + auto OT = mod.uniqueType(inputTy, {1, 32, 32, 16}); + + auto *C = F->addNode(new SophonConvolutionQ8Node("conv", OT, input, filter, + bias, {1, 1}, {2, 2, 2, 2}, + {0, 0}, 0, false)); + auto *R = F->addNode(new SophonReluQ8Node("relu", OT, C)); + auto *S = F->createSave("ret2", R); + IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + } + + void TearDown() override { + auto size = IR->getInstrs().size(); + IRBuilder B(IR.get()); // parser Function again for finding missing Inst, + // ex. deallocactivation + EXPECT_EQ(size, IR->getInstrs().size()); + } + + Module mod; + Function *F; + std::unique_ptr IR; +}; + +#define DEBUG_TYPE "InsertLoadStorePass" +TEST_F(LoadStoreTest, InsertLoadStorePass) { + DEBUG_GLOW(IR->dump()); + EXPECT_EQ(4, IR->getInstrs().size()); + sophon::runInsertLoadStorePass(IR.get()); + DEBUG_GLOW(IR->dump()); + EXPECT_EQ(16, IR->getInstrs().size()); + EXPECT_TRUE(llvm::isa(Inst(IR, 10))); +} + +#define DEBUG_TYPE "InsertLoadStorePass" +TEST_F(LoadStoreTest, InsertLoadStorePassTwice) { + EXPECT_EQ(4, IR->getInstrs().size()); + sophon::runInsertLoadStorePass(IR.get()); + sophon::runInsertLoadStorePass(IR.get()); + DEBUG_GLOW(IR->dump()); + EXPECT_EQ(16, IR->getInstrs().size()); +} + +class ReshapeTest : public ::testing::Test { +protected: + void SetUp() override { + F = mod.createFunction("TestHIR"); + auto *input = mod.createPlaceholder(ElemKind::FloatTy, {1, 32, 32, 3}, + "input", false); + ElemKind inputTy = input->getType()->getElementType(); + auto *filter = + mod.createPlaceholder(inputTy, {16, 5, 5, 3}, "filter", true); + auto *bias = mod.createPlaceholder(inputTy, {16}, "bias", true); + auto OT = mod.uniqueType(inputTy, {1, 32, 32, 16}); + + auto *C = F->addNode(new SophonConvolutionQ8Node("conv", OT, input, filter, + bias, {1, 1}, {2, 2, 2, 2}, + {0, 0}, 0, false)); + auto OT2 = mod.uniqueType(inputTy, {1, 32, 16, 32}); + auto *RE = F->addNode(new ReshapeNode("reshape", OT2, C, {1, 32, 16, 32})); + auto *R = F->addNode(new SophonReluQ8Node("relu", OT, RE)); + auto *S = F->createSave("ret2", R); + IR = llvm::make_unique(F); + IR->generateIR(); + glow::optimize(*IR, true); + } + + Module mod; + Function *F; + std::unique_ptr IR; +}; + +TEST_F(ReshapeTest, InsertLoadStorePass) { + IR->dump(); + EXPECT_EQ(5, IR->getInstrs().size()); + EXPECT_TRUE(llvm::isa(Inst(IR, 2))); + + sophon::runHandleReshape(IR.get()); + + IR->dump(); + EXPECT_EQ(8, IR->getInstrs().size()); + EXPECT_TRUE(llvm::isa(Inst(IR, 2))); + EXPECT_TRUE(llvm::isa(Inst(IR, 4))); +} diff --git a/tests/unittests/Sophon/CMakeLists.txt b/tests/unittests/Sophon/CMakeLists.txt new file mode 100644 index 0000000000..68c34925dc --- /dev/null +++ b/tests/unittests/Sophon/CMakeLists.txt @@ -0,0 +1,18 @@ + + +include_directories(${Glow_SOURCE_DIR}/lib) + +function(add_glow_sophon_test) + cmake_parse_arguments(ARG "" "NAME" "SRCS;LIBS;ARGS" ${ARGN}) + add_executable(${ARG_NAME} ${ARG_SRCS}) + target_link_libraries(${ARG_NAME} PRIVATE + ${ARG_LIBS} + Sophon + gtest + testMain) + add_glow_test( + NAME sophon_${ARG_NAME} + COMMAND ${ARG_NAME} ${ARG_ARGS}) +endfunction() + +add_subdirectory(Backends) diff --git a/tests/unittests/Sophon/testMain.cpp b/tests/unittests/Sophon/testMain.cpp new file mode 100644 index 0000000000..fe02d58be7 --- /dev/null +++ b/tests/unittests/Sophon/testMain.cpp @@ -0,0 +1,8 @@ +#include +#include + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + llvm::cl::ParseCommandLineOptions(argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tools/ClassGen/Backends/Sophon/CMakeLists.txt b/tools/ClassGen/Backends/Sophon/CMakeLists.txt new file mode 100644 index 0000000000..c60dd6a992 --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/CMakeLists.txt @@ -0,0 +1,7 @@ +set(VERIF_FILENAME SophonSpecificInstrsVerification.h) +configure_file(${VERIF_FILENAME} + ${GLOW_BINARY_DIR}/glow/${VERIF_FILENAME} COPYONLY) + +set(VERIF_FILENAME SophonSpecificNodesVerification.h) +configure_file(${VERIF_FILENAME} + ${GLOW_BINARY_DIR}/glow/${VERIF_FILENAME} COPYONLY) diff --git a/tools/ClassGen/Backends/Sophon/SophonMI.h b/tools/ClassGen/Backends/Sophon/SophonMI.h new file mode 100644 index 0000000000..fcd2e7e31a --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonMI.h @@ -0,0 +1,118 @@ + +// BM General +BB.newBackendSpecificInstr("SophonMIGDMAGlobalToLocal") + .addOperand("Local", OperandKind::Out) + .addOperand("Global", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "ShapeNCHW") + .addMember(MemberType::VectorUnsigned, "GlobalStrideNCH") + .addMember(MemberType::Boolean, "IsGlobalWeightSpace") + .addMember(MemberType::Boolean, "IsLocalAligned") + .autoVerify(VerifyKind::SameElementType, {"Local", "Global"}); + +BB.newBackendSpecificInstr("SophonMIGDMALocalToGlobal") + .addOperand("Global", OperandKind::Out) + .addOperand("Local", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "ShapeNCHW") + .addMember(MemberType::VectorUnsigned, "GlobalStrideNCH") + .addMember(MemberType::Boolean, "IsGlobalWeightSpace") + .addMember(MemberType::Boolean, "IsLocalAligned") + .autoVerify(VerifyKind::SameElementType, {"Local", "Global"}); + +// BM INT8 INST +BB.newBackendSpecificInstr("SophonMIReluQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMIMacConstQ8") + .addOperand("DestLow", OperandKind::Out) + .addOperand("DestHigh", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "LShiftWidth") + .addMember(MemberType::Unsigned, "Multiplier") + .addMember(MemberType::Unsigned, "IsMultiplierSigned") + .addMember(MemberType::Boolean, "IsResultI8") + .autoVerify(VerifyKind::SameElementType, {"DestLow", "DestHigh", "Src"}); + +BB.newBackendSpecificInstr("SophonMIMulConstQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Multiplier") + .addMember(MemberType::Unsigned, "IsMultiplierSigned") + .addMember(MemberType::Unsigned, "RShiftWidth") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMIMulConstQ16") + .addOperand("DestLow", OperandKind::Out) + .addOperand("DestHigh", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Multiplier") + .addMember(MemberType::Unsigned, "IsMultiplierSigned") + .addMember(MemberType::Unsigned, "RShiftWidth") + .autoVerify(VerifyKind::SameElementType, {"DestLow", "DestHigh", "Src"}); + +BB.newBackendSpecificInstr("SophonMIAvgPoolingQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMIMaxPoolingQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMIConvolutionQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::VectorUnsigned, "DilationHW") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Boolean, "EnableRelu") + .addMember(MemberType::Unsigned, "StreamID") + .addMember(MemberType::Unsigned, "InstID") + .addMember(MemberType::VectorUnsigned, "Depends") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonMIDepthwiseConvolutionQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "StreamID") + .addMember(MemberType::Unsigned, "InstID") + .addMember(MemberType::VectorUnsigned, "Depends") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonMIFCQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "LShiftWidth") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonMIFCQ16") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "LShiftWidth") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); diff --git a/tools/ClassGen/Backends/Sophon/SophonOpInstrs.h b/tools/ClassGen/Backends/Sophon/SophonOpInstrs.h new file mode 100644 index 0000000000..9e39e89839 --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonOpInstrs.h @@ -0,0 +1,10 @@ + +BB.newBackendSpecificInstr("SophonLoad") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In); + +BB.newBackendSpecificInstr("SophonStore") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .setType("Src->getType()") + .dataParallel(); diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificInstrs.h b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrs.h new file mode 100644 index 0000000000..898a819a9b --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrs.h @@ -0,0 +1,1189 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/// BM1682 +/// TG in old bmnet + +//===--------------------------------------------------------------------===// +// Convolution / Pool / FC +//===--------------------------------------------------------------------===// +BB.newBackendSpecificInstr("SophonConvolution") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonConvolutionWithoutBias") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter"}); + +BB.newBackendSpecificInstr("SophonConvolutionQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::VectorUnsigned, "DilationHW") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Boolean, "EnableRelu") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter"}); + +BB.newBackendSpecificInstr("SophonAvgPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonAvgPoolQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "Multiplier") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMaxPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMaxPoolQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "Multiplier") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +#if 1 // use SophonMatMul +BB.newBackendSpecificInstr("SophonFullyConnected") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Weights", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Boolean, "Relu") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Src", "Weights", "Bias"}); +#endif + +BB.newBackendSpecificInstr("SophonFullyConnectedQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Weights", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Boolean, "Relu") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "LShiftWidth") + .addMember(MemberType::Boolean, "ResultAdd") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Weights"}); + +//===--------------------------------------------------------------------===// +// Normalization +//===--------------------------------------------------------------------===// +BB.newBackendSpecificInstr("SophonNormalize") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + // .addInput("Scale") //!< delete + .addMember(MemberType::Boolean, "AcrossSpatial") + .addMember(MemberType::Boolean, "ChannelShared") + .addMember(MemberType::Float, "Epsilon") + .addMember(MemberType::Float, "Scale") //!< add this + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonBatchNormalization") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + // .addInput("Bias") //!< delete + .addOperand("Mean", OperandKind::In) + .addOperand("Variance", OperandKind::In) + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Epsilon") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Src", "Mean", "Variance"}); + +BB.newBackendSpecificInstr("SophonBatchNormalizationOpt") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Mean", OperandKind::In) + .addOperand("Variance", OperandKind::In) + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Epsilon") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Src", "Mean", "Variance"}); + +BB.newBackendSpecificInstr("SophonLocalResponseNormalization") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "Alpha") + .addMember(MemberType::Float, "Beta") + //.addMember(MemberType::Float, "bias") + .addMember(MemberType::Unsigned, "NormRegion") + .addMember(MemberType::Unsigned, "Size") + .addMember(MemberType::Float, "K") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +//===--------------------------------------------------------------------===// +// Activation +//===--------------------------------------------------------------------===// +BB.newBackendSpecificInstr("SophonRelu") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "NegativeSlope") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonReluQ8") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonSigmoid") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonTanh") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonPrelu") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Slope", OperandKind::In) + .addMember(MemberType::Boolean, "ChannelShared") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Slope"}); + +//===--------------------------------------------------------------------===// +// Other NN operations +//===--------------------------------------------------------------------===// + +BB.newBackendSpecificInstr("SophonSoftMax") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonPriorbox") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Weight", OperandKind::In) + // .addMember(MemberType::VectorFloat, "MinSize") + // .addMember(MemberType::VectorFloat, "MaxSize") + // .addMember(MemberType::VectorFloat, "AspectRatio") + // .addMember(MemberType::VectorFloat, "Variance") + .addMember(MemberType::Unsigned, "NumPriors") + .addMember(MemberType::Unsigned, "ImgH") + .addMember(MemberType::Unsigned, "ImgW") + .addMember(MemberType::Float, "StepH") + .addMember(MemberType::Float, "StepW") + .addMember(MemberType::Unsigned, "Clip") + .addMember(MemberType::Unsigned, "Offset") + .addMember(MemberType::Unsigned, "ReducebBoxes") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Weight"}); + +BB.newBackendSpecificInstr("SophonUpsample") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Size") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonDeconvolution") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonDeconvolutionWithoutBias") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter"}); + +BB.newBackendSpecificInstr("SophonDeconvolutionOpt") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("SophonDeconvolutionWithoutBiasOpt") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter"}); + +BB.newBackendSpecificInstr("SophonROIPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Rois", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "PoolShape") + .addMember(MemberType::Float, "SpatialScale") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Rois"}); + +BB.newBackendSpecificInstr("SophonPSROIPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "SpatialScale") + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonMultiRegion") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Classes") + .addMember(MemberType::Unsigned, "Coords") + .addMember(MemberType::Unsigned, "Nums") + .addMember(MemberType::VectorUnsigned, "ActivateParameters") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonLSTM") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("W", OperandKind::In) + .addOperand("R", OperandKind::In) + .addOperand("B", OperandKind::In) + .addOperand("P", OperandKind::In) + .addMember(MemberType::Unsigned, "time_num") + .addMember(MemberType::Unsigned, "with_x_static") + .addMember(MemberType::Unsigned, "expose_hidden") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Src", "W", "R", "B", "P"}); + +BB.newBackendSpecificInstr("SophonShuffleChannel") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Group") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +//===--------------------------------------------------------------------===// +// Shape transformations +//===--------------------------------------------------------------------===// +// +BB.newBackendSpecificInstr("SophonConcat") + .addOperand("Dest", OperandKind::Out) + //.addMember(MemberType::VectorNodeValue, "Inputs") //delete this, use LHS + // and RHS if concat number>3, use concat 2 times + .addMember(MemberType::Unsigned, "Dim") + //.autoIRGen() + .autoVerify(VerifyKind::NoVerify); + +// FIXME +BB.newBackendSpecificInstr("SophonConcat2") + .addOperand("Dest", OperandKind::Out) + .addOperand("Input0", OperandKind::In) + .addOperand("Input1", OperandKind::In) + .addMember(MemberType::Unsigned, "Dim") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Input0", "Input1"}); + +BB.newBackendSpecificInstr("SophonConcat3") + .addOperand("Dest", OperandKind::Out) + .addOperand("Input0", OperandKind::In) + .addOperand("Input1", OperandKind::In) + .addOperand("Input2", OperandKind::In) + .addMember(MemberType::Unsigned, "Dim") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Input0", "Input1", "Input2"}); + +BB.newBackendSpecificInstr("SophonConcat4") + .addOperand("Dest", OperandKind::Out) + .addOperand("Input0", OperandKind::In) + .addOperand("Input1", OperandKind::In) + .addOperand("Input2", OperandKind::In) + .addOperand("Input3", OperandKind::In) + .addMember(MemberType::Unsigned, "Dim") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Input0", "Input1", "Input2", "Input3"}); + +BB.newBackendSpecificInstr("SophonConcat5") + .addOperand("Dest", OperandKind::Out) + .addOperand("Input0", OperandKind::In) + .addOperand("Input1", OperandKind::In) + .addOperand("Input2", OperandKind::In) + .addOperand("Input3", OperandKind::In) + .addOperand("Input4", OperandKind::In) + .addMember(MemberType::Unsigned, "Dim") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Input0", "Input1", "Input2", "Input3", "Input4"}); + +BB.newBackendSpecificInstr("SophonConcat6") + .addOperand("Dest", OperandKind::Out) + .addOperand("Input0", OperandKind::In) + .addOperand("Input1", OperandKind::In) + .addOperand("Input2", OperandKind::In) + .addOperand("Input3", OperandKind::In) + .addOperand("Input4", OperandKind::In) + .addOperand("Input5", OperandKind::In) + .addMember(MemberType::Unsigned, "Dim") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, + {"Dest", "Input0", "Input1", "Input2", "Input3", "Input4", + "Input5"}); + +BB.newBackendSpecificInstr("SophonReshape") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorSizeT, "Dims") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonTranspose") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Shuffle") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonFlatten") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonReorg") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Stride") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonCrop") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Offsets") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonPermute") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Order") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonDummyData") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonEltwise") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + //.addMember(MemberType::VectorFloat, "Coeff") + .addMember(MemberType::Unsigned, "Operation") + .addMember(MemberType::Boolean, "StableProdGrad") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("SophonTile") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + // .addMember(MemberType::VectorFloat, "coeff") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "Tiles") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +//===--------------------------------------------------------------------===// +// Arithmetic +//===--------------------------------------------------------------------===// + +BB.newBackendSpecificInstr("SophonMatMul") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addOperand("Slice", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS", "Slice"}); + +BB.newBackendSpecificInstr("SophonScale") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Scale", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Scale", "Bias"}); + +BB.newBackendSpecificInstr("SophonScaleWithoutBias") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Scale", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Scale"}); + +BB.newBackendSpecificInstr("SophonScale1") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Bias"}); + +BB.newBackendSpecificInstr("SophonMul") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("SophonAdd") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("SophonMax") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("SophonPow") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "Power") + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Shift") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonSub") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("SophonDiv") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +//===--------------------------------------------------------------------===// +// Others +//===--------------------------------------------------------------------===// +BB.newBackendSpecificInstr("SophonAbs") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonYolo") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember( + MemberType::Unsigned, + "classes") // by ycs: in bmnet-caffe.proto, it's int32? not usigned? + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonRegion") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember( + MemberType::Unsigned, + "classes") // by ycs: in bmnet-caffe.proto, it's int32? not usigned? + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonProposal") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "feat_stride") + .addMember(MemberType::Unsigned, "pre_nms_topN") + .addMember(MemberType::Unsigned, "post_nms_topN") + .addMember(MemberType::Float, "nms_thresh") + .addMember(MemberType::Unsigned, "min_size") + .addMember(MemberType::Unsigned, "base_size") + .addMember(MemberType::Unsigned, "version") + // .addMember(MemberType::VectorFloat, "scale") by ycs: cann't use + // vectorfloat yet, need fix .addMember(MemberType::VectorFloat, "ratio") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +/****************** +BB.newBackendSpecificInstr("SophonSlice") + .addOperand("Dest", OperandKind::Out) + //.addOperand("Dest1", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Dest1","Src"}); +******************/ + +BB.newBackendSpecificInstr("SophonReduction") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "Operation") + .addMember(MemberType::Float, "Coeff") + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("SophonInterp") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Weights", OperandKind::In) + .autoIRGen() + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Weights"}); + +/// TL for old bmnet +BB.newBackendSpecificInstr("SophonTLActivation") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Activation") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLPrelu") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Weight") + .addMember(MemberType::Boolean, "ChannelShared") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLConvolution") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Weight") + .addMember(MemberType::Unsigned, "Bias") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::Unsigned, "Group") + .addMember(MemberType::Boolean, "DoBias") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLConvolutionWithoutBias") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Weight") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::Unsigned, "Group") + .addMember(MemberType::Boolean, "DoBias") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLMaxPooling") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLAveragePooling") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLTransportLoad") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Laddr") + .addMember(MemberType::Boolean, "Transpose") + .addMember(MemberType::Boolean, "Aligned") + .addMember(MemberType::Boolean, "IsWeight") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLTransportStore") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Laddr") + .addMember(MemberType::Boolean, "Transpose") + .addMember(MemberType::Boolean, "Aligned") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLUpsample") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Size") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLLrn") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::Unsigned, "Local_Size") + .addMember(MemberType::Float, "Alpha") + .addMember(MemberType::Float, "Beta") + .addMember(MemberType::Float, "K") + .addMember(MemberType::Unsigned, "Norm_Region") + .addMember(MemberType::Unsigned, "sqr_lut_weight") + .addMember(MemberType::Unsigned, "power_lut_weight") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLScale") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Scale") + .addMember(MemberType::Unsigned, "Bias") + .addMember(MemberType::Unsigned, "Scale_Dim") + .addMember(MemberType::Boolean, "Bias_Term") + .addMember(MemberType::Boolean, "If_Relu") + .addMember(MemberType::Float, "Relu_Slope") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLEltwise") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Working") + .addMember(MemberType::Unsigned, "Op_Code") + .addMember(MemberType::VectorFloat, "Coeff") + .addMember(MemberType::Boolean, "If_Relu") + .addMember(MemberType::Float, "Relu_Slope") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLBatchNorm") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Mean") + .addMember(MemberType::Unsigned, "Variance") + .addMember(MemberType::Unsigned, "Scale_ma") + .addMember(MemberType::Float, "Eps") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLShuffleChannel") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Group") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("SophonTLResizeBilinear") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Input") + .addMember(MemberType::Unsigned, "Output") + .addMember(MemberType::Unsigned, "Weight") + .addMember(MemberType::Unsigned, "Resize_h") + .addMember(MemberType::Unsigned, "Resize_w") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("ParallelEnable").autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("ParallelDisable").autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Alloc_Const") + .addMember(MemberType::Float, "fvalue") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Alloc") + .addMember(MemberType::TypeRef, "Ty") + .addMember(MemberType::Unsigned, "Ctrls") + .setType("Ty") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Alloc_Bank") + .addMember(MemberType::Unsigned, "BankId") + .addMember(MemberType::TypeRef, "Ty") + .addMember(MemberType::Unsigned, "Ctrls") + .setType("Ty") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Prealloc") + .addMember(MemberType::Unsigned, "LAddress") + .addMember(MemberType::TypeRef, "Ty") + .setType("Ty") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Prealloc_Align") + .addMember(MemberType::Unsigned, "LAddress") + .addMember(MemberType::TypeRef, "Ty") + .setType("Ty") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Free") + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Load_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorSizeT, "Strides") + .addMember(MemberType::Unsigned, "Ctrls") + .addMember(MemberType::Unsigned, "GAddress") // FIXME + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Store_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorSizeT, "Strides") + .addMember(MemberType::Unsigned, "Ctrls") + .addMember(MemberType::Unsigned, "GAddress") // FIXME + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Load") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .addMember(MemberType::Unsigned, "GAddress") // FIXME + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Store") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .addMember(MemberType::Unsigned, "GAddress") // FIXME + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Slice") + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "N_Start") + .addMember(MemberType::Unsigned, "N_End") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_CW_Transpose") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Copy_Gdma") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Copy_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorSizeT, "S_Strides") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("G_Copy") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "D_GAddr") + .addMember(MemberType::Unsigned, "S_GAddr") + .addMember(MemberType::VectorUnsigned, "Shape") + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("G_Copy_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "D_GAddr") + .addMember(MemberType::Unsigned, "S_GAddr") + .addMember(MemberType::VectorUnsigned, "D_Shape") + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorUnsigned, "S_Shape") + .addMember(MemberType::VectorSizeT, "S_Strides") + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("G_Copy_Stride_Transpose") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "D_GAddr") + .addMember(MemberType::Unsigned, "S_GAddr") + .addMember(MemberType::VectorUnsigned, "D_Shape") + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorUnsigned, "S_Shape") + .addMember(MemberType::VectorSizeT, "S_Strides") + .addMember(MemberType::Boolean, "Transpose") + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Conv") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorSizeT, "KStrides") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Boolean, "KernelFlip") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter", "Bias"}); + +BB.newBackendSpecificInstr("TL_Conv_Without_Bias") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Filter", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorSizeT, "KStrides") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Boolean, "KernelFlip") + .addMember(MemberType::Boolean, "ResultAdd") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src", "Filter"}); + +BB.newBackendSpecificInstr("TL_MaxPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("TL_AvgPool") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Float, "AvgConst") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("TL_MaxPool_Bwd") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addOperand("Index", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Pads") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("TL_AvgPool_Bwd") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::VectorUnsigned, "Inserts") + .addMember(MemberType::VectorUnsigned, "InsertsLast") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Float, "AvgConst") + .autoVerify(VerifyKind::SameElementType, {"Dest", "Src"}); + +BB.newBackendSpecificInstr("TL_Matrix_Mac") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Matrix_Mac_Bias") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS", "Bias"}); + +BB.newBackendSpecificInstr("TL_Mac") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Mac_Bias") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addOperand("Bias", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Max") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +// FIXME +BB.newBackendSpecificInstr("TL_Max1") + .addOperand("LHS", OperandKind::InOut) // Result is same with LHS + .addOperand("RHS", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Cmp") + .addOperand("R_AB", OperandKind::Out) + .addOperand("R_CD", OperandKind::Out) + .addOperand("SRC_A", OperandKind::In) + .addOperand("SRC_B", OperandKind::In) + .addOperand("SRC_C", OperandKind::In) + .addOperand("SRC_D", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Mul") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Mul_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorSizeT, "L_Strides") + .addMember(MemberType::VectorSizeT, "R_Strides") + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Reshape") + .addOperand("Src", OperandKind::InOut) + .addMember(MemberType::VectorSizeT, "Shape") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Lrn_Shift") + .addOperand("Src", OperandKind::In) + .addOperand("Dest", OperandKind::Out) + .addMember(MemberType::Boolean, "Right_shift") + .addMember(MemberType::Unsigned, "Lrn_step") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Div") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Div_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorSizeT, "L_Strides") + .addMember(MemberType::VectorSizeT, "R_Strides") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Add") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Add_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorSizeT, "L_Strides") + .addMember(MemberType::VectorSizeT, "R_Strides") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Sub") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Sub_Stride") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::VectorSizeT, "D_Strides") + .addMember(MemberType::VectorSizeT, "L_Strides") + .addMember(MemberType::VectorSizeT, "R_Strides") + .autoVerify(VerifyKind::SameElementType, {"Dest", "LHS", "RHS"}); + +BB.newBackendSpecificInstr("TL_Sum") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Img_Sum") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Xa") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "Const_a") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Ex") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Lnx") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Rsq") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Xn") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .addMember(MemberType::Float, "Const_n") // This argument should be int. + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Arithmetic") + .addOperand("Dest", OperandKind::Out) + .addOperand("LHS", OperandKind::In) + .addOperand("RHS", OperandKind::In) + .addMember(MemberType::Unsigned, "Op") + .addMember(MemberType::Unsigned, "Ctrls") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Copy") + .addOperand("Dest", OperandKind::Out) + .addOperand("Src", OperandKind::In) + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Fill") + .addOperand("Dest", OperandKind::Out) + .addMember(MemberType::Float, "Val") + .autoVerify(VerifyKind::NoVerify); + +BB.newBackendSpecificInstr("TL_Cpu_Op") + .addMember(MemberType::String, "Op_Name") + .addMember(MemberType::String, "Payload") + .addMember(MemberType::Unsigned, "Size") + .autoVerify(VerifyKind::NoVerify); + +#include "SophonMI.h" +#include "SophonOpInstrs.h" + +/// verification +BB.includeBackendSpecificVerification("SophonSpecificInstrsVerification.h"); diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h new file mode 100644 index 0000000000..3696520908 --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h @@ -0,0 +1,63 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +void SophonConvolutionQ8Node::verify() const { + // TBD +} + +void SophonFullyConnectedQ8Node::verify() const { + // TBD +} + +void SophonReluQ8Node::verify() const { + // TBD +} + +void SophonMaxPoolQ8Node::verify() const { + // TBD +} + +void SophonAvgPoolQ8Node::verify() const { + // TBD +} + +void SophonLoadInst::verify() const { + // TBD +} + +void SophonStoreInst::verify() const { + // TBD +} + +#if 0 +void SophonFakeParallelStartInst::verify() const {} +void SophonFakeParallelEndInst::verify() const {} + +void TL_AllocInst::verify() const {} +void SophonDeallocLocalTensorInst::verify() const {} + +void SophonLoadStrideInst::verify() const {} +void SophonLoadInst::verify() const {} +void SophonStoreStrideInst::verify() const {} +void SophonStoreInst::verify() const {} + +void SophonLocalMacInst::verify() const {} +void SophonLocalMaxInst::verify() const {} +void SophonLocalCmpInst::verify() const {} +void SophonLocalMulInst::verify() const {} + +void SophonLocalReshapeInst::verify() const {} +#endif diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificNodes.h b/tools/ClassGen/Backends/Sophon/SophonSpecificNodes.h new file mode 100644 index 0000000000..6c48addb6b --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificNodes.h @@ -0,0 +1,636 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//===--------------------------------------------------------------------===// +// Convolution / Pool / FC +//===--------------------------------------------------------------------===// + +BB.newNode("SophonConvolution") + .addInput("Input") + .addInput("Filter") + .addInput("Bias") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring("Performs Convolution using a given Input, Filter, and " + "Bias tensors, as well as provided Kernels, Strides, Pads, " + "and Group."); + +BB.newNode("SophonConvolutionWithoutBias") + .addInput("Input") + .addInput("Filter") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring("Performs Convolution using a given Input, Filter, " + "as well as provided Kernels, Strides, Pads, " + "and Group."); + +BB.newNode("SophonConvolutionQ8") + .addInput("Input") + .addInput("Filter") + .addInput("Bias") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::VectorUnsigned, "DilationHW") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Boolean, "EnableRelu") + .addResultFromCtorArg() + .setDocstring("Performs Convolution int8 using a given Input, Filter, and " + "Bias tensors, as well as provided Kernels, Strides, Pads, " + "and Group."); + +BB.newNode("SophonMaxPool") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Boolean, "RoundMode") + .addResultFromCtorArg() + .setDocstring("Performs a Max Pool operation on the Input given provided " + "Kernels, Strides, and Pads."); + +BB.newNode("SophonMaxPoolQ8") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "Multiplier") + .addMember(MemberType::Boolean, "RoundMode") + .addResultFromCtorArg() + .setDocstring( + "Performs a Max Pool int8 operation on the Input given provided " + "Kernels, Strides, and Pads."); + +BB.newNode("SophonAvgPool") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::Boolean, "RoundMode") + .addResultFromCtorArg() + .setDocstring("Performs a Avg Pool operation on the Input given provided " + "Kernels, Strides, and Pads."); + +BB.newNode("SophonAvgPoolQ8") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "KernelHW") + .addMember(MemberType::VectorUnsigned, "StrideHW") + .addMember(MemberType::VectorUnsigned, "PadTLBR") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "Multiplier") + .addMember(MemberType::Boolean, "RoundMode") + .addResultFromCtorArg() + .setDocstring( + "Performs a Avg Pool int8 operation on the Input given provided " + "Kernels, Strides, and Pads."); + +#if 1 // use SophonMatMul instead +BB.newNode("SophonFullyConnected") + .addInput("Input") + .addInput("Weights") + .addInput("Bias") + .addMember(MemberType::Boolean, "Relu") // TODO(wwcai) + .addResultFromCtorArg() + .setDocstring("Creates a FullyConnected node where the Input tensor and " + "Weights tensor are multiplied, and then the Bias tensor " + "is added to it, producing the Output."); +#endif +BB.newNode("SophonFullyConnectedQ8") + .addInput("Input") + .addInput("Weights") + .addInput("Bias") + .addMember(MemberType::Boolean, "Relu") + .addMember(MemberType::Unsigned, "RShiftWidth") + .addMember(MemberType::Unsigned, "LShiftWidth") + .addMember(MemberType::Boolean, "ResultAdd") + .addResultFromCtorArg() + .setDocstring( + "Creates a FullyConnected int8 node where the Input tensor and " + "Weights tensor are multiplied, and then the Bias tensor " + "is added to it, producing the Output."); + +BB.newNode("SophonMatMul") + .addInput("LHS") + .addInput("RHS") + .addInput("Slice") + .addResultFromCtorArg() + .setDocstring( + "Performs matrix multiplication between the LHS RHS, and " + "Adds the 'Slice' operand to each one of the slices in the batch." + "Example: (A, Z) x (Z, B) + (B) => (A, B)"); + +//===--------------------------------------------------------------------===// +// Normalization +//===--------------------------------------------------------------------===// + +BB.newNode("SophonNormalize") + .addInput("Input") + // .addInput("Scale") //!< delete + .addMember(MemberType::Boolean, "AcrossSpatial") + .addMember(MemberType::Boolean, "ChannelShared") + .addMember(MemberType::Float, "Epsilon") + .addMember(MemberType::Float, "Scale") //!< add this + .addResultFromCtorArg() + .setDocstring("Performs batch normalization on the Input tensor with the " + "provided Scale, Bias, Mean, Var, ChannelIdx, Epsilon, and " + "Momentum. Similar to Caffe2 SpatialBN, and ONNX " + "BatchNormalization operator."); + +BB.newNode("SophonBatchNormalization") + .addInput("Input") + // .addInput("Bias") //!< delete + .addInput("Mean") + .addInput("Variance") + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Epsilon") + .addResultFromCtorArg() + .setDocstring("Performs batch normalization on the Input tensor with the " + "provided Scale, Bias, Mean, Var, ChannelIdx, Epsilon, and " + "Momentum. Similar to Caffe2 SpatialBN, and ONNX " + "BatchNormalization operator."); + +BB.newNode("SophonBatchNormalizationOpt") + .addInput("Input") + .addInput("Mean") + .addInput("Variance") + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Epsilon") + .addResultFromCtorArg() + .setDocstring("Performs batch normalization on the Input tensor with the " + "provided Scale, Bias, Mean, Var, ChannelIdx, Epsilon, and " + "Momentum. Similar to Caffe2 SpatialBN, and ONNX " + "BatchNormalization operator."); + +BB.newNode("SophonLocalResponseNormalization") + .addInput("Input") + .addMember(MemberType::Float, "Alpha") + .addMember(MemberType::Float, "Beta") + //.addMember(MemberType::Float, "bias") caffe doesn't have this parameter, + // comment it + .addMember(MemberType::Unsigned, "NormRegion") + .addMember(MemberType::Unsigned, "Size") + .addMember(MemberType::Float, "K") + .addResultFromCtorArg() + .setDocstring("Performs local response normalization on the Input tensor " + "with the provided Scale, Bias, Mean, Var, ChannelIdx, " + "Epsilon, and Momentum. Similar to Caffe2 and ONNX LRN."); + +//===--------------------------------------------------------------------===// +// Activation +//===--------------------------------------------------------------------===// +BB.newNode("SophonRelu") + .addInput("Input") + .addMember(MemberType::Float, "NegativeSlope") + .addResultFromCtorArg() + .setDocstring( + "Applies ReLU, max(0, x), to each element in the Input tensor."); + +BB.newNode("SophonReluQ8") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring( + "Applies ReLU, max(0, x), to each element in the Input int8 tensor."); + +BB.newNode("SophonSigmoid") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring("Applies Sigmoid, 1 / (1 + exp(-x)), to each element in " + "the Input tensor."); + +BB.newNode("SophonTanh") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring("Applies hyperbolic tangent to each element in the Input " + "tensor."); + +BB.newNode("SophonPrelu") + .addInput("Input") + .addInput("Slope") + .addMember(MemberType::Boolean, "ChannelShared") + .addResultFromCtorArg() + .setDocstring("PRelu takes input data (Tensor) and slope tensor as input," + "and produces one output data (Tensor) where the function" + "f(x) = slope * x for x < 0, f(x) = x for x >= 0., " + "is applied to the data tensor elementwise."); + +//===--------------------------------------------------------------------===// +// Other NN operations +//===--------------------------------------------------------------------===// + +BB.newNode("SophonSoftMax") + .addInput("Input") + .addMember(MemberType::Unsigned, "Axis") + .addResultFromCtorArg() + .setDocstring("Performs SoftMax normalization on the Input tensor."); + +BB.newNode("SophonPriorbox") + .addInput("Input") + .addInput("Weight") + // .addMember(MemberType::VectorFloat, "MinSize") + // .addMember(MemberType::VectorFloat, "MaxSize") + // .addMember(MemberType::VectorFloat, "AspectRatio") + // .addMember(MemberType::VectorFloat, "Variance") + .addMember(MemberType::Unsigned, "NumPriors") + .addMember(MemberType::Unsigned, "ImgH") + .addMember(MemberType::Unsigned, "ImgW") + .addMember(MemberType::Float, "StepH") + .addMember(MemberType::Float, "StepW") + .addMember(MemberType::Unsigned, "Clip") + .addMember(MemberType::Unsigned, "Offset") + .addMember(MemberType::Unsigned, "ReducebBoxes") + .addResultFromCtorArg() + .setDocstring("PriorBox takes input data, and produces prior box of " + "featuremap. Similar to SSD PriorBox."); + +BB.newNode("SophonUpsample") + .addInput("Input") + .addMember(MemberType::Unsigned, "Size") + .addResultFromCtorArg() + .setDocstring("Upsample the input tensor. Each dimension value of the " + "output tensor is: output_dimension = size."); + +BB.newNode("SophonDeconvolution") + .addInput("Input") + .addInput("Filter") + .addInput("Bias") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring( + "Performs Deconvolution using a given Input, Filter, and " + "Bias tensors, as well as provided Kernels, Strides, Pads, and Group."); + +BB.newNode("SophonDeconvolutionWithoutBias") + .addInput("Input") + .addInput("Filter") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring( + "Performs Deconvolution using a given Input, Filter, and " + "Bias tensors, as well as provided Kernels, Strides, Pads, and Group."); + +BB.newNode("SophonDeconvolutionOpt") + .addInput("Input") + .addInput("Filter") + .addInput("Bias") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring("Performs Deconvolution using a given Input and Filter, " + "as well as provided Kernels, Strides, Pads, and Group."); + +BB.newNode("SophonDeconvolutionWithoutBiasOpt") + .addInput("Input") + .addInput("Filter") + .addMember(MemberType::VectorUnsigned, "Kernels") + .addMember(MemberType::VectorUnsigned, "Strides") + .addMember(MemberType::VectorUnsigned, "Pads") + .addMember(MemberType::VectorUnsigned, "Dilations") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring("Performs Deconvolution using a given Input and Filter," + "as well as provided Kernels, Strides, Pads, and Group."); + +BB.newNode("SophonROIPool") + .addInput("Input") + .addInput("Rois") + .addMember(MemberType::VectorUnsigned, "PoolShape") + .addMember(MemberType::Float, "SpatialScale") + .addResultFromCtorArg() + .setDocstring( + "ROI pool consumes an input tensor X and region of interests " + "(RoIs) to apply pooling across each RoI, to produce output 4-D tensor " + "of shape (num_rois, channels, pooled_shape[0], pooled_shape[1]). " + "Similar " + "to Faster-RCNN ROIPooling."); + +BB.newNode("SophonPSROIPool") + .addInput("Input") + .addMember(MemberType::Float, "SpatialScale") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring( + "Position sensitive ROI pooling. Similar to RFCN PSROIPooling."); + +BB.newNode("SophonMultiRegion") + .addInput("Input") + .addMember(MemberType::Unsigned, "Classes") + .addMember(MemberType::Unsigned, "Coords") + .addMember(MemberType::Unsigned, "Nums") + .addMember(MemberType::VectorUnsigned, "ActivateParameters") + .addResultFromCtorArg() + .setDocstring("Similar to darknet multiregion."); + +BB.newNode("SophonLSTM") + .addInput("Input") + .addInput("W") + .addInput("R") + .addInput("B") + .addInput("P") + .addMember(MemberType::Unsigned, "time_num") + .addMember(MemberType::Unsigned, "with_x_static") + .addMember(MemberType::Unsigned, "expose_hidden") + .addResultFromCtorArg() + .setDocstring("Computes an one-layer LSTM."); + +BB.newNode("SophonShuffleChannel") + .addInput("Input") + .addMember(MemberType::Unsigned, "Group") + .addResultFromCtorArg() + .setDocstring("Shuffle the channels of input tensor based on the group."); + +BB.newNode("SophonSlice") + .addInput("Input") + .addMember(MemberType::Unsigned, "axis") + .addResultFromCtorArg() + .setDocstring("Slice."); + +//===--------------------------------------------------------------------===// +// Shape transformations +//===--------------------------------------------------------------------===// + +BB.newNode("SophonConcat") + .addMember(MemberType::VectorNodeValue, "Inputs") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +// FIXME +BB.newNode("SophonConcat2") + .addInput("Input0") + .addInput("Input1") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +BB.newNode("SophonConcat3") + .addInput("Input0") + .addInput("Input1") + .addInput("Input2") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +BB.newNode("SophonConcat4") + .addInput("Input0") + .addInput("Input1") + .addInput("Input2") + .addInput("Input3") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +BB.newNode("SophonConcat5") + .addInput("Input0") + .addInput("Input1") + .addInput("Input2") + .addInput("Input3") + .addInput("Input4") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +BB.newNode("SophonConcat6") + .addInput("Input0") + .addInput("Input1") + .addInput("Input2") + .addInput("Input3") + .addInput("Input4") + .addInput("Input5") + .addMember(MemberType::Unsigned, "Dim") + .addResultFromCtorArg() + .setDocstring("The concat operator adds two tensors together.\nThe " + "parameter 'dim' specifies the dimension to use when " + "joining the tensors."); + +BB.newNode("SophonReshape") + .addInput("Input") + .addMember(MemberType::VectorSizeT, "Dims") + .addResultFromCtorArg() + .setDocstring("Reshape the Input tensor to shape Dims."); + +BB.newNode("SophonTranspose") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "Shuffle") + .addResultFromCtorArg() + .setDocstring("Transpose the Input tensor based on the vector Shuffle, " + "which assigns a new axis for each dimension in Input."); + +BB.newNode("SophonFlatten") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring("Flattens the input tensor into a 2D matrix."); + +BB.newNode("SophonReorg") + .addInput("Input") + .addMember(MemberType::Unsigned, "Stride") + .addResultFromCtorArg() + .setDocstring("The reorganization layer takes every alternate pixel and " + "puts that into a different channel."); + +BB.newNode("SophonCrop") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "Offsets") + .addResultFromCtorArg() + .setDocstring("Crop the Input tensor by Offsets."); + +BB.newNode("SophonPermute") + .addInput("Input") + .addMember(MemberType::VectorUnsigned, "Order") + .addResultFromCtorArg() + .setDocstring("Replace index axis order."); + +BB.newNode("SophonDummyData") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring("Dummy Data."); + +BB.newNode("SophonEltwise") + .addInput("LHS") + .addInput("RHS") + //.addMember(MemberType::VectorFloat, "Coeff") + .addMember(MemberType::Unsigned, "Operation") + .addMember(MemberType::Boolean, "StableProdGrad") + .addResultFromCtorArg() + .setDocstring( + "The eltwise operator adds/product/max two tensors together."); + +BB.newNode("SophonTile") + .addInput("Input") + // .addMember(MemberType::VectorFloat, "coeff") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "Tiles") + .addResultFromCtorArg() + .setDocstring( + "The eltwise operator adds/product/max two tensors together."); + +//===--------------------------------------------------------------------===// +// Arithmetic +//===--------------------------------------------------------------------===// + +BB.newNode("SophonScale") + .addInput("Input") + .addInput("Scale") + .addInput("Bias") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .addResultFromCtorArg() + .setDocstring("Scale input tensor."); + +BB.newNode("SophonScaleWithoutBias") + .addInput("Input") + .addInput("Scale") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .addResultFromCtorArg() + .setDocstring("Scale input tensor."); + +BB.newNode("SophonScale1") + .addInput("Input") + .addInput("Bias") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "NumAxes") + .addResultFromCtorArg() + .setDocstring("Scale input tensor."); + +BB.newNode("SophonMul") + .addInput("LHS") + .addInput("RHS") + .addResultFromCtorArg() + .setDocstring("Performs Mul on the LHS and RHS operands."); + +BB.newNode("SophonAdd") + .addInput("LHS") + .addInput("RHS") + .addResultFromCtorArg() + .setDocstring("Performs Add on the LHS and RHS operands."); + +BB.newNode("SophonMax") + .addInput("LHS") + .addInput("RHS") + .addResultFromCtorArg() + .setDocstring("Performs Max on the LHS and RHS operands."); + +BB.newNode("SophonPow") + .addInput("Input") + .addMember(MemberType::Float, "Power") + .addMember(MemberType::Float, "Scale") + .addMember(MemberType::Float, "Shift") + .addResultFromCtorArg() + .setDocstring("Performs elementwise pow(LHS, RHS)."); + +BB.newNode("SophonSub") + .addInput("LHS") + .addInput("RHS") + .addResultFromCtorArg() + .setDocstring("Performs Sub on the LHS and RHS operands."); + +BB.newNode("SophonDiv") + .addInput("LHS") + .addInput("RHS") + .addResultFromCtorArg() + .setDocstring("Performs Div on the LHS and RHS operands."); + +//===--------------------------------------------------------------------===// +// Others +//===--------------------------------------------------------------------===// +BB.newNode("SophonAbs") + .addInput("Input") + .addResultFromCtorArg() + .setDocstring("Applies Abs."); + +BB.newNode("SophonYolo") + .addInput("Input") + .addMember( + MemberType::Unsigned, + "classes") // by ycs: in bmnet-caffe.proto, it's int32? not usigned? + .addResultFromCtorArg() + .setDocstring("yolo."); + +BB.newNode("SophonRegion") + .addInput("Input") + .addMember( + MemberType::Unsigned, + "classes") // by ycs: in bmnet-caffe.proto, it's int32? not usigned? + .addResultFromCtorArg() + .setDocstring("Region."); + +BB.newNode("SophonProposal") + .addInput("Input") + .addMember(MemberType::Unsigned, "feat_stride") + .addMember(MemberType::Unsigned, "pre_nms_topN") + .addMember(MemberType::Unsigned, "post_nms_topN") + .addMember(MemberType::Float, "nms_thresh") + .addMember(MemberType::Unsigned, "min_size") + .addMember(MemberType::Unsigned, "base_size") + .addMember(MemberType::Unsigned, "version") + // .addMember(MemberType::VectorFloat, "scale") by ycs: cann't use + // vectorfloat yet, need fix .addMember(MemberType::VectorFloat, "ratio") + .addResultFromCtorArg() + .setDocstring("Prosal."); + +/****************** +BB.newNode("SophonSlice") + .addInput("Input") + .addMember(MemberType::Unsigned, "Axis") + .addResultFromCtorArg() + .addResultFromCtorArg() + .setDocstring( + "Applies slice."); +******************/ + +BB.newNode("SophonReduction") + .addInput("Input") + .addMember(MemberType::Unsigned, "Axis") + .addMember(MemberType::Unsigned, "Operation") + .addMember(MemberType::Float, "Coeff") + .addResultFromCtorArg() + .setDocstring("Applies slice."); +BB.newNode("SophonInterp") + .addInput("Input") + .addInput("Weights") + .addResultFromCtorArg() + .setDocstring("Interp."); + +BB.includeBackendSpecificVerification("SophonSpecificNodesVerification.h"); diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h b/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h new file mode 100644 index 0000000000..caae4709cc --- /dev/null +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h @@ -0,0 +1,92 @@ +/** + * Copyright (c) 2017-present, Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +void SophonConvolutionNode::verify() const { + // TODO: just demo + ShapeNCHW idim(getInput().getType()->dims()); + ShapeNCHW odim(getResult().getType()->dims()); + auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, getKernels(), + getStrides(), getPads()); + ShapeNCHW exp(idim.n, getBias().dims()[0], outSz.first, outSz.second); + (void)exp; + assert(exp == odim && "Invalid output dimensions"); +} + +void SophonConvolutionWithoutBiasNode::verify() const {} + +void SophonMaxPoolNode::verify() const {} +void SophonAvgPoolNode::verify() const {} +void SophonFullyConnectedNode::verify() const {} +void SophonMatMulNode::verify() const {} + +void SophonNormalizeNode::verify() const {} +void SophonBatchNormalizationNode::verify() const {} +void SophonBatchNormalizationOptNode::verify() const {} +void SophonLocalResponseNormalizationNode::verify() const {} + +void SophonReluNode::verify() const {} +void SophonTanhNode::verify() const {} +void SophonSigmoidNode::verify() const {} +void SophonPreluNode::verify() const {} + +void SophonSoftMaxNode::verify() const {} +void SophonPriorboxNode::verify() const { assert(0); } +void SophonUpsampleNode::verify() const {} +void SophonDeconvolutionNode::verify() const {} +void SophonDeconvolutionWithoutBiasNode::verify() const {} +void SophonDeconvolutionOptNode::verify() const {} +void SophonDeconvolutionWithoutBiasOptNode::verify() const {} +void SophonROIPoolNode::verify() const {} +void SophonPSROIPoolNode::verify() const {} +void SophonMultiRegionNode::verify() const {} +void SophonLSTMNode::verify() const {} +void SophonShuffleChannelNode::verify() const {} +void SophonSliceNode::verify() const {} + +void SophonConcatNode::verify() const {} +void SophonConcat2Node::verify() const {} +void SophonConcat3Node::verify() const {} +void SophonConcat4Node::verify() const {} +void SophonConcat5Node::verify() const {} +void SophonConcat6Node::verify() const {} +void SophonReshapeNode::verify() const {} +void SophonTransposeNode::verify() const {} +void SophonFlattenNode::verify() const {} +void SophonCropNode::verify() const {} +void SophonReorgNode::verify() const {} +void SophonPermuteNode::verify() const {} +void SophonDummyDataNode::verify() const {} +void SophonEltwiseNode::verify() const {} +void SophonTileNode::verify() const {} + +void SophonScaleNode::verify() const {} +void SophonScaleWithoutBiasNode::verify() const {} +void SophonScale1Node::verify() const {} +void SophonMulNode::verify() const {} +void SophonAddNode::verify() const {} +void SophonMaxNode::verify() const {} +void SophonPowNode::verify() const {} +void SophonAbsNode::verify() const {} +void SophonSubNode::verify() const {} +void SophonDivNode::verify() const {} + +void SophonProposalNode::verify() const { assert(0); } +void SophonRegionNode::verify() const {} +void SophonYoloNode::verify() const {} +void SophonInterpNode::verify() const {} + +// void SophonSliceNode::verify() const {} +void SophonReductionNode::verify() const {} diff --git a/tools/ClassGen/CMakeLists.txt b/tools/ClassGen/CMakeLists.txt index 76d93ab4c5..fbd4dfbd67 100644 --- a/tools/ClassGen/CMakeLists.txt +++ b/tools/ClassGen/CMakeLists.txt @@ -26,3 +26,7 @@ endif() if(GLOW_WITH_OPENCL) add_subdirectory(Backends/OpenCL) endif() + +if(GLOW_WITH_SOPHON) + add_subdirectory(Backends/Sophon) +endif() diff --git a/tools/ClassGen/InstrGen.cpp b/tools/ClassGen/InstrGen.cpp index 3dea553762..b0dd856ee3 100644 --- a/tools/ClassGen/InstrGen.cpp +++ b/tools/ClassGen/InstrGen.cpp @@ -535,6 +535,7 @@ int main(int argc, char **argv) { #include "Backends/CPU/CPUSpecificInstrs.h" #include "Backends/OpenCL/OpenCLSpecificInstrs.h" +#include "Backends/Sophon/SophonSpecificInstrs.h" return 0; } diff --git a/tools/ClassGen/NodeGen.cpp b/tools/ClassGen/NodeGen.cpp index 1276487e17..d447ef7163 100644 --- a/tools/ClassGen/NodeGen.cpp +++ b/tools/ClassGen/NodeGen.cpp @@ -597,6 +597,7 @@ int main(int argc, char **argv) { #include "Backends/CPU/CPUSpecificNodes.h" #include "Backends/OpenCL/OpenCLSpecificNodes.h" +#include "Backends/Sophon/SophonSpecificNodes.h" return 0; } From ef90af084fa9776b38490491395526c225e32b23 Mon Sep 17 00:00:00 2001 From: Eric Yu Date: Wed, 26 Dec 2018 16:33:29 +0000 Subject: [PATCH 2/7] [DOCKER] add Dockerfile for sophon new file: utils/docker/Dockerfile.sophon new file: utils/docker/Dockerfile.sophon.dev --- utils/docker/Dockerfile.sophon | 8 ++++++++ utils/docker/Dockerfile.sophon.dev | 20 ++++++++++++++++++++ utils/docker/build-sophon-dev.sh | 8 ++++++++ utils/docker/build-sophon.sh | 8 ++++++++ 4 files changed, 44 insertions(+) create mode 100644 utils/docker/Dockerfile.sophon create mode 100644 utils/docker/Dockerfile.sophon.dev create mode 100755 utils/docker/build-sophon-dev.sh create mode 100755 utils/docker/build-sophon.sh diff --git a/utils/docker/Dockerfile.sophon b/utils/docker/Dockerfile.sophon new file mode 100644 index 0000000000..137b576e32 --- /dev/null +++ b/utils/docker/Dockerfile.sophon @@ -0,0 +1,8 @@ +FROM pytorch/glow:0.1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + openssh-client \ + ca-certificates \ + libgoogle-glog-dev + diff --git a/utils/docker/Dockerfile.sophon.dev b/utils/docker/Dockerfile.sophon.dev new file mode 100644 index 0000000000..c246c37c09 --- /dev/null +++ b/utils/docker/Dockerfile.sophon.dev @@ -0,0 +1,20 @@ +FROM pytorch/glow/sophon:0.1 + +# cmake +RUN apt-get update && apt-get install -y --no-install-recommends \ + libncurses5-dev +WORKDIR /opt +RUN wget --no-check-certificate http://www.cmake.org/files/v3.12/cmake-3.12.1.tar.gz && \ + tar -xvzf cmake-3.12.1.tar.gz && \ + cd cmake-3.12.1/ && \ + ./bootstrap && \ + make -j4 && \ + make install + +RUN apt-get update && apt-get install -y --no-install-recommends \ + clang-format-6.0 \ + clang-tidy-6.0 +RUN ln -s /usr/bin/clang-format-6.0 /usr/bin/clang-format +RUN ln -s /usr/bin/clang-tidy-6.0 /usr/bin/clang-tidy + + diff --git a/utils/docker/build-sophon-dev.sh b/utils/docker/build-sophon-dev.sh new file mode 100755 index 0000000000..9219709231 --- /dev/null +++ b/utils/docker/build-sophon-dev.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -exu +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$SCRIPT_DIR" + +./build-sophon.sh +docker build -f Dockerfile.sophon.dev -t pytorch/glow/sophon/dev:0.1 . diff --git a/utils/docker/build-sophon.sh b/utils/docker/build-sophon.sh new file mode 100755 index 0000000000..f4bc4eb115 --- /dev/null +++ b/utils/docker/build-sophon.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -exu +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "$SCRIPT_DIR" + +./build.sh +docker build -f Dockerfile.sophon -t pytorch/glow/sophon:0.1 . From f136d1415ec86cdff2843ac9dbb03c43f6b3329f Mon Sep 17 00:00:00 2001 From: zakk Date: Thu, 27 Dec 2018 00:39:12 +0800 Subject: [PATCH 3/7] [Sophon] default disable Sophon backend 1. default disable Sophon backend s. update library path --- CMakeLists.txt | 2 +- lib/Backends/Sophon/CMakeLists.txt | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ac2f991588..eb8efb76b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ enable_testing() option(GLOW_WITH_CPU "Build the LLVM-based JIT CPU backend" ON) option(GLOW_WITH_OPENCL "Build the OpenCL backend" ON) -option(GLOW_WITH_SOPHON "Build the Sophon backend" ON) +option(GLOW_WITH_SOPHON "Build the Sophon backend" OFF) option(GLOW_BUILD_EXAMPLES "Build the examples" ON) option(GLOW_BUILD_TESTS "Build the tests" ON) option(GLOW_WITH_BUNDLES "Build bundles" OFF) diff --git a/lib/Backends/Sophon/CMakeLists.txt b/lib/Backends/Sophon/CMakeLists.txt index fd348ea671..8efdce9777 100644 --- a/lib/Backends/Sophon/CMakeLists.txt +++ b/lib/Backends/Sophon/CMakeLists.txt @@ -30,9 +30,9 @@ target_link_libraries(Sophon PRIVATE CodeGen Importer Quantization - ${Sophon_ext_lib}/lib/cmodel/libbmkernel-static.a - ${Sophon_ext_lib}/lib/cmodel/libbmodel.so - ${Sophon_ext_lib}/lib/cmodel/libbmruntime.so) + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmkernel-static.a + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmodel.so + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmruntime.so) set(Sophon_base ${Glow_SOURCE_DIR}/lib/Backends) function(add_libbmnet_glow) From dabaf0430c246fa821f6be94c359f8f8a9bd0105 Mon Sep 17 00:00:00 2001 From: Eric Yu Date: Thu, 27 Dec 2018 07:04:16 +0000 Subject: [PATCH 4/7] [CMake] add AutoGen target for fix build dependencies --- lib/Backends/Sophon/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Backends/Sophon/CMakeLists.txt b/lib/Backends/Sophon/CMakeLists.txt index 8efdce9777..1e5a98b71c 100644 --- a/lib/Backends/Sophon/CMakeLists.txt +++ b/lib/Backends/Sophon/CMakeLists.txt @@ -41,7 +41,7 @@ function(add_libbmnet_glow) string(REPLACE "-" "_" target_name ${target_name}) set(target_name ${target_name}_obj) add_library(${target_name} OBJECT ${ARGN}) - add_dependencies(${target_name} sophon_sdk) + add_dependencies(${target_name} sophon_sdk AutoGen) target_sources(Sophon PRIVATE $) endfunction() From d4242b494d0729cce55e5923bf366915c85b3724 Mon Sep 17 00:00:00 2001 From: Eric Yu Date: Thu, 27 Dec 2018 08:20:22 +0000 Subject: [PATCH 5/7] api change --- .../Sophon/BM188x/BM1880AllocationsInfo.cpp | 6 +- .../Sophon/BM188x/BM1880AllocationsInfo.h | 4 +- lib/Backends/Sophon/BM188x/BM1880Backend.cpp | 13 +- lib/Backends/Sophon/BM188x/BM1880Backend.h | 5 +- lib/Backends/Sophon/SophonFunction.cpp | 44 ++++--- lib/Backends/Sophon/SophonFunction.h | 18 ++- .../Sophon/SophonSpecificInstrsVerification.h | 41 +++--- .../Sophon/SophonSpecificNodesVerification.h | 117 +++++++++--------- 8 files changed, 134 insertions(+), 114 deletions(-) diff --git a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp index b490c68aba..d5805729b8 100644 --- a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp +++ b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.cpp @@ -17,11 +17,11 @@ using llvm::cast; using llvm::dyn_cast; using llvm::isa; -BM1880AllocationsInfo::BM1880AllocationsInfo() : ctx_(nullptr), TTI_(nullptr) {} +BM1880AllocationsInfo::BM1880AllocationsInfo() : TTI_(nullptr) {} BM1880AllocationsInfo::BM1880AllocationsInfo( - const Context &ctx, const sophon::SophonTargetTransformInfo *TTI) - : ctx_(&ctx), TTI_(TTI) {} + const sophon::SophonTargetTransformInfo *TTI) + : TTI_(TTI) {} void BM1880AllocationsInfo::allocateWeightVars(const IRFunction *F) { diff --git a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h index 276b7f9533..c1d4ce49c2 100644 --- a/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h +++ b/lib/Backends/Sophon/BM188x/BM1880AllocationsInfo.h @@ -27,13 +27,11 @@ class BM1880AllocationsInfo : public AllocationsInfo { /// Amount of memory to be allocated for activations. size_t local_memory_sizes_{0}; - const Context *ctx_; const sophon::SophonTargetTransformInfo *TTI_; public: BM1880AllocationsInfo(); - BM1880AllocationsInfo(const Context &ctx, - const sophon::SophonTargetTransformInfo *TTI); + BM1880AllocationsInfo(const sophon::SophonTargetTransformInfo *TTI); void allocateWeightVars(const IRFunction *F) override; void allocateActivations(const IRFunction *F) override; void allocateTensorViews(const IRFunction *F) override { diff --git a/lib/Backends/Sophon/BM188x/BM1880Backend.cpp b/lib/Backends/Sophon/BM188x/BM1880Backend.cpp index 487ec37265..bebc4e7e69 100644 --- a/lib/Backends/Sophon/BM188x/BM1880Backend.cpp +++ b/lib/Backends/Sophon/BM188x/BM1880Backend.cpp @@ -51,24 +51,23 @@ BM1880Backend::codegen(std::unique_ptr IR, } std::unique_ptr -BM1880Backend::compileIR(std::unique_ptr IR, - const Context &ctx) const { - BM1880AllocationsInfo allocationsInfo(ctx, getTTI()); +BM1880Backend::compileIR(std::unique_ptr IR + ) const { + BM1880AllocationsInfo allocationsInfo(getTTI()); runOptimizationPasses(IR.get(), &allocationsInfo); return codegen(std::move(IR), &allocationsInfo); } std::unique_ptr -BM1880Backend::compile(Function *F, const Context &ctx) const { +BM1880Backend::compile(Function *F) const { auto IR = generateAndOptimizeIR(F, true /*shouldShareBuffers*/); - return compileIR(std::move(IR), ctx); + return compileIR(std::move(IR)); } void BM1880Backend::save(Function *F, llvm::StringRef outputDir, llvm::StringRef networkName) const { auto IR = generateAndOptimizeIR(F, true /*shouldShareBuffers*/); - Context ctx; - BM1880AllocationsInfo allocationsInfo(ctx, getTTI()); + BM1880AllocationsInfo allocationsInfo(getTTI()); runOptimizationPasses(IR.get(), &allocationsInfo); auto b = Bundle(this, allocationsInfo); auto model = b.codegen(IR.get()); diff --git a/lib/Backends/Sophon/BM188x/BM1880Backend.h b/lib/Backends/Sophon/BM188x/BM1880Backend.h index a37e848df2..39bd515427 100644 --- a/lib/Backends/Sophon/BM188x/BM1880Backend.h +++ b/lib/Backends/Sophon/BM188x/BM1880Backend.h @@ -34,7 +34,7 @@ class BM1880Backend final : public SophonBackend { ~BM1880Backend() override = default; std::unique_ptr - compileIR(std::unique_ptr IR, const Context &ctx) const override; + compileIR(std::unique_ptr IR) const override; std::unique_ptr codegen(std::unique_ptr IR, AllocationsInfo *allocationsInfo) const; @@ -43,8 +43,7 @@ class BM1880Backend final : public SophonBackend { BM1880AllocationsInfo *allocationsInfo) const; /// JIT Mode: compile to FunctionCompiled - std::unique_ptr compile(Function *F, - const Context &ctx) const override; + std::unique_ptr compile(Function *F) const override; /// AOT Mode: save to bmodel void save(Function *F, llvm::StringRef outputDir, diff --git a/lib/Backends/Sophon/SophonFunction.cpp b/lib/Backends/Sophon/SophonFunction.cpp index 3adb5ebd01..364504ceab 100644 --- a/lib/Backends/Sophon/SophonFunction.cpp +++ b/lib/Backends/Sophon/SophonFunction.cpp @@ -15,30 +15,19 @@ #include "glow/Graph/Context.h" #include "glow/Support/Debug.h" #include "llvm/Support/Debug.h" - #include -#include -#include namespace glow { SophonFunction::SophonFunction(std::unique_ptr model) { model_ = std::move(model); -} - -SophonFunction::~SophonFunction() {} - -void SophonFunction::execute(Context &ctx) { DEBUG_GLOW(bmodel::print(*model_)); - bmctx_t bmctx; bmerr_t ret; ret = bm_init(0, &bmctx); if (ret != BM_SUCCESS) { llvm_unreachable("bm_init failed"); } - bmnet_t net; - bmnet_output_info_t output_info; auto bmodel_filename = []() { char temp[] = "/tmp/glow-ut-temp.XXXXXX"; @@ -64,9 +53,28 @@ void SophonFunction::execute(Context &ctx) { if (ret != BM_SUCCESS) { llvm_unreachable("get output failed!"); } +} + +SophonFunction::~SophonFunction() { + bmnet_cleanup(net); + bm_exit(bmctx); +} + +void SophonFunction::execute() { + // run cmdbuf + bmerr_t ret; + ret = bmnet_run(net); + if (ret != BM_SUCCESS) { + llvm_unreachable("run failed!"); + } +} +void SophonFunction::setupRuns() {} + +void SophonFunction::beforeRun(const Context &ctx) { // TODO support multiple inputs std::vector input; + bmerr_t ret; for (auto PH : ctx.pairs()) { // input if fail to find "save_" prefix @@ -83,24 +91,18 @@ void SophonFunction::execute(Context &ctx) { if (ret != BM_SUCCESS) { llvm_unreachable("load input failed!"); } +} - // run cmdbuf - ret = bmnet_run(net); - if (ret != BM_SUCCESS) { - llvm_unreachable("run failed!"); - } - +void SophonFunction::afterRun(const Context &ctx) { size_t output_size = output_info.output_size; std::vector output(output_size); + bmerr_t ret; // download output data ret = bmnet_store_output(net, output.data()); if (ret != BM_SUCCESS) { llvm_unreachable("store output failed!"); } - bmnet_cleanup(net); - bm_exit(bmctx); - // TODO support multiple outputs for (auto PH : ctx.pairs()) { // Sophon Backend uses "save_" prefix to recognize output @@ -111,4 +113,6 @@ void SophonFunction::execute(Context &ctx) { } } +void SophonFunction::tearDownRuns() {} + } // namespace glow diff --git a/lib/Backends/Sophon/SophonFunction.h b/lib/Backends/Sophon/SophonFunction.h index d6e92fff7a..54b6c6d70b 100644 --- a/lib/Backends/Sophon/SophonFunction.h +++ b/lib/Backends/Sophon/SophonFunction.h @@ -11,8 +11,9 @@ #define _SophonFUNCTION_H #include "glow/Backends/CompiledFunction.h" - #include +#include +#include #include namespace glow { @@ -28,10 +29,23 @@ class SophonFunction final : public CompiledFunction { ///@{ ~SophonFunction() override; - void execute(Context &ctx) override; + /// Allocate Mutable buffers on device this includes Activations and + /// Placeholders. + void setupRuns() override; + /// Copy Input Placeholder data to position. + void beforeRun(const Context &ctx) override; + /// Copy Outputs to Placeholders in \p ctx. + void afterRun(const Context &ctx) override; + /// Final cleanup, free all allocations. + void tearDownRuns() override; + + void execute() override; private: std::unique_ptr model_; + bmnet_t net; + bmctx_t bmctx; + bmnet_output_info_t output_info; }; } // namespace glow diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h index 3696520908..19c462024e 100644 --- a/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificInstrsVerification.h @@ -14,24 +14,29 @@ * limitations under the License. */ -void SophonConvolutionQ8Node::verify() const { +bool SophonConvolutionQ8Node::verify() const { // TBD + return true; } -void SophonFullyConnectedQ8Node::verify() const { +bool SophonFullyConnectedQ8Node::verify() const { // TBD + return true; } -void SophonReluQ8Node::verify() const { +bool SophonReluQ8Node::verify() const { // TBD + return true; } -void SophonMaxPoolQ8Node::verify() const { +bool SophonMaxPoolQ8Node::verify() const { // TBD + return true; } -void SophonAvgPoolQ8Node::verify() const { +bool SophonAvgPoolQ8Node::verify() const { // TBD + return true; } void SophonLoadInst::verify() const { @@ -43,21 +48,21 @@ void SophonStoreInst::verify() const { } #if 0 -void SophonFakeParallelStartInst::verify() const {} -void SophonFakeParallelEndInst::verify() const {} +bool SophonFakeParallelStartInst::verify() const {} +bool SophonFakeParallelEndInst::verify() const {} -void TL_AllocInst::verify() const {} -void SophonDeallocLocalTensorInst::verify() const {} +bool TL_AllocInst::verify() const {} +bool SophonDeallocLocalTensorInst::verify() const {} -void SophonLoadStrideInst::verify() const {} -void SophonLoadInst::verify() const {} -void SophonStoreStrideInst::verify() const {} -void SophonStoreInst::verify() const {} +bool SophonLoadStrideInst::verify() const {} +bool SophonLoadInst::verify() const {} +bool SophonStoreStrideInst::verify() const {} +bool SophonStoreInst::verify() const {} -void SophonLocalMacInst::verify() const {} -void SophonLocalMaxInst::verify() const {} -void SophonLocalCmpInst::verify() const {} -void SophonLocalMulInst::verify() const {} +bool SophonLocalMacInst::verify() const {} +bool SophonLocalMaxInst::verify() const {} +bool SophonLocalCmpInst::verify() const {} +bool SophonLocalMulInst::verify() const {} -void SophonLocalReshapeInst::verify() const {} +bool SophonLocalReshapeInst::verify() const {} #endif diff --git a/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h b/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h index caae4709cc..9da5f22c82 100644 --- a/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h +++ b/tools/ClassGen/Backends/Sophon/SophonSpecificNodesVerification.h @@ -14,7 +14,7 @@ * limitations under the License. */ -void SophonConvolutionNode::verify() const { +bool SophonConvolutionNode::verify() const { // TODO: just demo ShapeNCHW idim(getInput().getType()->dims()); ShapeNCHW odim(getResult().getType()->dims()); @@ -23,70 +23,71 @@ void SophonConvolutionNode::verify() const { ShapeNCHW exp(idim.n, getBias().dims()[0], outSz.first, outSz.second); (void)exp; assert(exp == odim && "Invalid output dimensions"); + return true; } -void SophonConvolutionWithoutBiasNode::verify() const {} +bool SophonConvolutionWithoutBiasNode::verify() const { return true; } -void SophonMaxPoolNode::verify() const {} -void SophonAvgPoolNode::verify() const {} -void SophonFullyConnectedNode::verify() const {} -void SophonMatMulNode::verify() const {} +bool SophonMaxPoolNode::verify() const { return true; } +bool SophonAvgPoolNode::verify() const { return true; } +bool SophonFullyConnectedNode::verify() const { return true; } +bool SophonMatMulNode::verify() const { return true; } -void SophonNormalizeNode::verify() const {} -void SophonBatchNormalizationNode::verify() const {} -void SophonBatchNormalizationOptNode::verify() const {} -void SophonLocalResponseNormalizationNode::verify() const {} +bool SophonNormalizeNode::verify() const { return true; } +bool SophonBatchNormalizationNode::verify() const { return true; } +bool SophonBatchNormalizationOptNode::verify() const { return true; } +bool SophonLocalResponseNormalizationNode::verify() const { return true; } -void SophonReluNode::verify() const {} -void SophonTanhNode::verify() const {} -void SophonSigmoidNode::verify() const {} -void SophonPreluNode::verify() const {} +bool SophonReluNode::verify() const { return true; } +bool SophonTanhNode::verify() const { return true; } +bool SophonSigmoidNode::verify() const { return true; } +bool SophonPreluNode::verify() const { return true; } -void SophonSoftMaxNode::verify() const {} -void SophonPriorboxNode::verify() const { assert(0); } -void SophonUpsampleNode::verify() const {} -void SophonDeconvolutionNode::verify() const {} -void SophonDeconvolutionWithoutBiasNode::verify() const {} -void SophonDeconvolutionOptNode::verify() const {} -void SophonDeconvolutionWithoutBiasOptNode::verify() const {} -void SophonROIPoolNode::verify() const {} -void SophonPSROIPoolNode::verify() const {} -void SophonMultiRegionNode::verify() const {} -void SophonLSTMNode::verify() const {} -void SophonShuffleChannelNode::verify() const {} -void SophonSliceNode::verify() const {} +bool SophonSoftMaxNode::verify() const { return true; } +bool SophonPriorboxNode::verify() const { assert(0); } +bool SophonUpsampleNode::verify() const { return true; } +bool SophonDeconvolutionNode::verify() const { return true; } +bool SophonDeconvolutionWithoutBiasNode::verify() const { return true; } +bool SophonDeconvolutionOptNode::verify() const { return true; } +bool SophonDeconvolutionWithoutBiasOptNode::verify() const { return true; } +bool SophonROIPoolNode::verify() const { return true; } +bool SophonPSROIPoolNode::verify() const { return true; } +bool SophonMultiRegionNode::verify() const { return true; } +bool SophonLSTMNode::verify() const { return true; } +bool SophonShuffleChannelNode::verify() const { return true; } +bool SophonSliceNode::verify() const { return true; } -void SophonConcatNode::verify() const {} -void SophonConcat2Node::verify() const {} -void SophonConcat3Node::verify() const {} -void SophonConcat4Node::verify() const {} -void SophonConcat5Node::verify() const {} -void SophonConcat6Node::verify() const {} -void SophonReshapeNode::verify() const {} -void SophonTransposeNode::verify() const {} -void SophonFlattenNode::verify() const {} -void SophonCropNode::verify() const {} -void SophonReorgNode::verify() const {} -void SophonPermuteNode::verify() const {} -void SophonDummyDataNode::verify() const {} -void SophonEltwiseNode::verify() const {} -void SophonTileNode::verify() const {} +bool SophonConcatNode::verify() const { return true; } +bool SophonConcat2Node::verify() const { return true; } +bool SophonConcat3Node::verify() const { return true; } +bool SophonConcat4Node::verify() const { return true; } +bool SophonConcat5Node::verify() const { return true; } +bool SophonConcat6Node::verify() const { return true; } +bool SophonReshapeNode::verify() const { return true; } +bool SophonTransposeNode::verify() const { return true; } +bool SophonFlattenNode::verify() const { return true; } +bool SophonCropNode::verify() const { return true; } +bool SophonReorgNode::verify() const { return true; } +bool SophonPermuteNode::verify() const { return true; } +bool SophonDummyDataNode::verify() const { return true; } +bool SophonEltwiseNode::verify() const { return true; } +bool SophonTileNode::verify() const { return true; } -void SophonScaleNode::verify() const {} -void SophonScaleWithoutBiasNode::verify() const {} -void SophonScale1Node::verify() const {} -void SophonMulNode::verify() const {} -void SophonAddNode::verify() const {} -void SophonMaxNode::verify() const {} -void SophonPowNode::verify() const {} -void SophonAbsNode::verify() const {} -void SophonSubNode::verify() const {} -void SophonDivNode::verify() const {} +bool SophonScaleNode::verify() const { return true; } +bool SophonScaleWithoutBiasNode::verify() const { return true; } +bool SophonScale1Node::verify() const { return true; } +bool SophonMulNode::verify() const { return true; } +bool SophonAddNode::verify() const { return true; } +bool SophonMaxNode::verify() const { return true; } +bool SophonPowNode::verify() const { return true; } +bool SophonAbsNode::verify() const { return true; } +bool SophonSubNode::verify() const { return true; } +bool SophonDivNode::verify() const { return true; } -void SophonProposalNode::verify() const { assert(0); } -void SophonRegionNode::verify() const {} -void SophonYoloNode::verify() const {} -void SophonInterpNode::verify() const {} +bool SophonProposalNode::verify() const { assert(0); } +bool SophonRegionNode::verify() const { return true; } +bool SophonYoloNode::verify() const { return true; } +bool SophonInterpNode::verify() const { return true; } -// void SophonSliceNode::verify() const {} -void SophonReductionNode::verify() const {} +// bool SophonSliceNode::verify() const {return true;} +bool SophonReductionNode::verify() const { return true; } From c2a29c2608a11f40ad98d9c1901a061038e5e0aa Mon Sep 17 00:00:00 2001 From: Eric Yu Date: Thu, 27 Dec 2018 09:01:20 +0000 Subject: [PATCH 6/7] ut --- .../Sophon/Backends/BM1880CodeGenTest.cpp | 14 ++++++++-- .../Sophon/Backends/BM1880Expand.cpp | 26 +++++++++---------- .../Backends/BM1880ExpandCodeGenTest.cpp | 18 ++++++++++--- .../Sophon/Backends/BM1880MemoryAllocTest.cpp | 9 +++++-- .../Sophon/Backends/LoadStoreTest.cpp | 4 +-- tests/unittests/Sophon/CMakeLists.txt | 2 +- 6 files changed, 49 insertions(+), 24 deletions(-) diff --git a/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp b/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp index 9b5ef22b11..a2932ad4df 100644 --- a/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp +++ b/tests/unittests/Sophon/Backends/BM1880CodeGenTest.cpp @@ -326,7 +326,12 @@ TEST(BM1880CodeGenTest, LIRConvCompileRun) { } backend->reorderWeights(IR.get()); - backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + auto function = backend->codegen(std::move(IR), &allocInfo); + function->setupRuns(); + function->beforeRun(ctx); + function->execute(); + function->afterRun(ctx); + function->tearDownRuns(); auto H = outputTensor->getHandle(); EXPECT_EQ(H.at({0, 0, 0, 0}), 1); @@ -510,7 +515,12 @@ TEST(BM1880CodeGenTest, LIRFCRun) { std::unique_ptr backend(new BM1880Backend()); backend->reorderWeights(IR.get()); - backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + auto function = backend->codegen(std::move(IR), &allocInfo); + function->setupRuns(); + function->beforeRun(ctx); + function->execute(); + function->afterRun(ctx); + function->tearDownRuns(); auto H = ctx_fc_y->getHandle(); for (unsigned i = 0; i < N; i++) { diff --git a/tests/unittests/Sophon/Backends/BM1880Expand.cpp b/tests/unittests/Sophon/Backends/BM1880Expand.cpp index 14f77151df..4f6592effc 100644 --- a/tests/unittests/Sophon/Backends/BM1880Expand.cpp +++ b/tests/unittests/Sophon/Backends/BM1880Expand.cpp @@ -73,7 +73,7 @@ TEST(BM1880ExpandTest, ExpandConvQ8) { std::advance(cur_inst, 4); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIConvolutionQ8InstKind); - auto *MI_inst = llvm::cast(cur_inst); + auto *MI_inst = llvm::cast(&*cur_inst); EXPECT_EQ(MI_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(MI_inst->getSrc()->getName(), lmem_in->getName()); EXPECT_EQ(MI_inst->getFilter()->getName(), lmem_weight->getName()); @@ -133,7 +133,7 @@ TEST(BM1880ExpandTest, ExpandMaxPoolingQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIMaxPoolingQ8InstKind); - auto *pool_inst = llvm::cast(cur_inst); + auto *pool_inst = llvm::cast(&*cur_inst); EXPECT_EQ(pool_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(pool_inst->getSrc()->getName(), lmem_in->getName()); EXPECT_EQ(pool_inst->getKernelHW(), @@ -146,7 +146,7 @@ TEST(BM1880ExpandTest, ExpandMaxPoolingQ8) { std::advance(cur_inst, 1); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIMulConstQ8InstKind); - auto *mul_inst = llvm::cast(cur_inst); + auto *mul_inst = llvm::cast(&*cur_inst); EXPECT_EQ(mul_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(mul_inst->getSrc()->getName(), lmem_out->getName()); EXPECT_EQ(mul_inst->getMultiplier(), multiplier); @@ -182,7 +182,7 @@ TEST(BM1880ExpandTest, ExpandReluQ8) { auto cur_inst = IR->getInstrs().begin(); std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIReluQ8InstKind); - auto *relu_inst = llvm::cast(cur_inst); + auto *relu_inst = llvm::cast(&*cur_inst); EXPECT_EQ(relu_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(relu_inst->getSrc()->getName(), lmem_in->getName()); } @@ -236,7 +236,7 @@ TEST(BM1880ExpandTest, ExpandFcQ8) { auto cur_inst = IR->getInstrs().begin(); std::advance(cur_inst, 4); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIFCQ8InstKind); - auto *fc_inst = llvm::cast(cur_inst); + auto *fc_inst = llvm::cast(&*cur_inst); EXPECT_EQ(fc_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(fc_inst->getSrc()->getName(), lmem_in->getName()); EXPECT_EQ(fc_inst->getFilter()->getName(), lmem_weights->getName()); @@ -296,7 +296,7 @@ TEST(BM1880ExpandTest, ExpandFcReluQ8) { auto cur_inst = IR->getInstrs().begin(); std::advance(cur_inst, 4); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIFCQ8InstKind); - auto *fc_inst = llvm::cast(cur_inst); + auto *fc_inst = llvm::cast(&*cur_inst); EXPECT_EQ(fc_inst->getDest()->getName(), lmem_out->getName()); EXPECT_EQ(fc_inst->getSrc()->getName(), lmem_in->getName()); EXPECT_EQ(fc_inst->getFilter()->getName(), lmem_weights->getName()); @@ -335,7 +335,7 @@ TEST(BM1880ExpandTest, ExpandLoadQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_input = llvm::cast(cur_inst); + auto *load_input = llvm::cast(&*cur_inst); EXPECT_EQ(load_input->getShapeNCHW(), (llvm::ArrayRef{1, 1, 3, 3})); EXPECT_EQ(load_input->getGlobalStrideNCH(), (llvm::ArrayRef{1 * 3 * 3, 3 * 3, 3})); @@ -345,7 +345,7 @@ TEST(BM1880ExpandTest, ExpandLoadQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_weight = llvm::cast(cur_inst); + auto *load_weight = llvm::cast(&*cur_inst); EXPECT_EQ(load_weight->getShapeNCHW(), (llvm::ArrayRef{1, 3, 9, 1})); EXPECT_EQ(load_weight->getGlobalStrideNCH(), @@ -356,7 +356,7 @@ TEST(BM1880ExpandTest, ExpandLoadQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_bias = llvm::cast(cur_inst); + auto *load_bias = llvm::cast(&*cur_inst); EXPECT_EQ(load_bias->getShapeNCHW(), (llvm::ArrayRef{2, 3, 1, 1})); EXPECT_EQ(load_bias->getGlobalStrideNCH(), (llvm::ArrayRef{3 * 1 * 1, 1 * 1, 1})); @@ -366,7 +366,7 @@ TEST(BM1880ExpandTest, ExpandLoadQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMALocalToGlobalInstKind); - auto *store = llvm::cast(cur_inst); + auto *store = llvm::cast(&*cur_inst); EXPECT_EQ(store->getShapeNCHW(), (llvm::ArrayRef{1, 3, 3, 3})); EXPECT_EQ(store->getGlobalStrideNCH(), (llvm::ArrayRef{3 * 3 * 3, 3 * 3, 3})); @@ -405,7 +405,7 @@ TEST(BM1880ExpandTest, ExpandLoadStoreFcQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_input = llvm::cast(cur_inst); + auto *load_input = llvm::cast(&*cur_inst); EXPECT_EQ(load_input->getShapeNCHW(), (llvm::ArrayRef{1, 25, 1, 32})); // 25=800/32 EXPECT_EQ(load_input->getGlobalStrideNCH(), @@ -417,7 +417,7 @@ TEST(BM1880ExpandTest, ExpandLoadStoreFcQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_weight = llvm::cast(cur_inst); + auto *load_weight = llvm::cast(&*cur_inst); EXPECT_EQ(load_weight->getShapeNCHW(), (llvm::ArrayRef{800, 16, 1, 32})); // 16=500/32 EXPECT_EQ(load_weight->getGlobalStrideNCH(), @@ -429,7 +429,7 @@ TEST(BM1880ExpandTest, ExpandLoadStoreFcQ8) { std::advance(cur_inst, 2); EXPECT_EQ(cur_inst->getKind(), glow::Kinded::Kind::SophonMIGDMAGlobalToLocalInstKind); - auto *load_bias = llvm::cast(cur_inst); + auto *load_bias = llvm::cast(&*cur_inst); EXPECT_EQ(load_bias->getShapeNCHW(), (llvm::ArrayRef{2, 16, 1, 32})); // 16=500/32 EXPECT_EQ(load_bias->getGlobalStrideNCH(), diff --git a/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp b/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp index 172b428c3d..28f836d934 100644 --- a/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp +++ b/tests/unittests/Sophon/Backends/BM1880ExpandCodeGenTest.cpp @@ -18,10 +18,10 @@ using namespace glow; template -static auto Inst(T &t, size_t idx) -> decltype(t->getInstrs().begin()) { +static auto Inst(T &t, size_t idx) -> decltype(&*t->getInstrs().begin()) { auto cur_inst = t->getInstrs().begin(); std::advance(cur_inst, idx); - return cur_inst; + return &*cur_inst; } TEST(BM1880ExapndCodeGenTest, ConvQ8Run) { @@ -115,7 +115,12 @@ TEST(BM1880ExapndCodeGenTest, ConvQ8Run) { mem_lut[llvm::cast(Inst(IR, 5))] = 2 * 32 + 1024 * 32; // size: 2 * 32 // run - backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + auto function = backend->codegen(std::move(IR), &allocInfo); + function->setupRuns(); + function->beforeRun(ctx); + function->execute(); + function->afterRun(ctx); + function->tearDownRuns(); // check result auto H = outputTensor->getHandle(); @@ -273,7 +278,12 @@ TEST(BM1880ExapndCodeGenTest, LIRFC) { mem_lut[llvm::cast(Inst(IR, 5))] = 2 * 32 + 1024 * 32; // size: 2 * 32 // 6. run on cmodel - backend->codegen(std::move(IR), &allocInfo)->execute(ctx); + auto function = backend->codegen(std::move(IR), &allocInfo); + function->setupRuns(); + function->beforeRun(ctx); + function->execute(); + function->afterRun(ctx); + function->tearDownRuns(); auto H = outputTensor->getHandle(); for (unsigned i = 0; i < N; i++) { diff --git a/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp b/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp index 8c971f0b8b..320245c6a2 100644 --- a/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp +++ b/tests/unittests/Sophon/Backends/BM1880MemoryAllocTest.cpp @@ -70,7 +70,7 @@ TEST(BM1880MemAllocTest, ConvMemAllocRun) { // run backend flow std::unique_ptr backend(new BM1880Backend()); - BM1880AllocationsInfo alloc_info(ctx, backend->getTTI()); + BM1880AllocationsInfo alloc_info(backend->getTTI()); backend->runOptimizationPasses(IR.get(), &alloc_info); // check alloc info @@ -113,7 +113,12 @@ TEST(BM1880MemAllocTest, ConvMemAllocRun) { #endif // codegen and run - backend->codegen(std::move(IR), &alloc_info)->execute(ctx); + auto function = backend->codegen(std::move(IR), &alloc_info); + function->setupRuns(); + function->beforeRun(ctx); + function->execute(); + function->afterRun(ctx); + function->tearDownRuns(); // check result auto H = outputTensor->getHandle(); diff --git a/tests/unittests/Sophon/Backends/LoadStoreTest.cpp b/tests/unittests/Sophon/Backends/LoadStoreTest.cpp index f718b21d11..950952ac1a 100644 --- a/tests/unittests/Sophon/Backends/LoadStoreTest.cpp +++ b/tests/unittests/Sophon/Backends/LoadStoreTest.cpp @@ -15,10 +15,10 @@ using namespace glow; using namespace std; template -static auto Inst(T &t, size_t idx) -> decltype(t->getInstrs().begin()) { +static auto Inst(T &t, size_t idx) -> decltype(&*t->getInstrs().begin()) { auto cur_inst = t->getInstrs().begin(); std::advance(cur_inst, idx); - return cur_inst; + return &*cur_inst; } class LoadStoreTest : public ::testing::Test { diff --git a/tests/unittests/Sophon/CMakeLists.txt b/tests/unittests/Sophon/CMakeLists.txt index 68c34925dc..8b48c25002 100644 --- a/tests/unittests/Sophon/CMakeLists.txt +++ b/tests/unittests/Sophon/CMakeLists.txt @@ -9,7 +9,7 @@ function(add_glow_sophon_test) ${ARG_LIBS} Sophon gtest - testMain) + TestMain) add_glow_test( NAME sophon_${ARG_NAME} COMMAND ${ARG_NAME} ${ARG_ARGS}) From 9341e2a03d25b0e4d07bde5b51d57310678a63d2 Mon Sep 17 00:00:00 2001 From: zakk Date: Thu, 27 Dec 2018 21:24:21 +0800 Subject: [PATCH 7/7] [Sophon] fix ninja build dependencies --- lib/Backends/Sophon/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Backends/Sophon/CMakeLists.txt b/lib/Backends/Sophon/CMakeLists.txt index 1e5a98b71c..7cd50f6b16 100644 --- a/lib/Backends/Sophon/CMakeLists.txt +++ b/lib/Backends/Sophon/CMakeLists.txt @@ -10,9 +10,9 @@ ExternalProject_Add(sophon_sdk BUILD_COMMAND "" INSTALL_COMMAND "" BUILD_BYPRODUCTS - ${Sophon_ext_lib}/lib/cmodel/libbmkernel-static.a - ${Sophon_ext_lib}/lib/cmodel/libbmodel.so - ${Sophon_ext_lib}/lib/cmodel/libbmruntime.so + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmkernel-static.a + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmodel.so + ${Sophon_ext_lib}/bmnet_sdk/install/lib/libbmruntime.so GIT_REPOSITORY https://github.com/ffk0716/bm1880-bmnnsdk-usb.git GIT_TAG bm1880-usb_1.0.2.2-hotfix)