//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Builders.h" #include "llvm/Support/FormatVariadic.h" namespace mlir { template struct GPUFuncOpLowering : ConvertToLLVMPattern { explicit GPUFuncOpLowering(LLVMTypeConverter &typeConverter) : ConvertToLLVMPattern(gpu::GPUFuncOp::getOperationName(), typeConverter.getDialect()->getContext(), typeConverter) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { assert(operands.empty() && "func op is not expected to have operands"); auto gpuFuncOp = cast(op); Location loc = gpuFuncOp.getLoc(); SmallVector workgroupBuffers; workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions()); for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) { Value attribution = en.value(); auto type = attribution.getType().dyn_cast(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); uint64_t numElements = type.getNumElements(); auto elementType = typeConverter->convertType(type.getElementType()) .template cast(); auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements); std::string name = std::string( llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index())); auto globalOp = rewriter.create( gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false, LLVM::Linkage::Internal, name, /*value=*/Attribute(), gpu::GPUDialect::getWorkgroupAddressSpace()); workgroupBuffers.push_back(globalOp); } // Rewrite the original GPU function to an LLVM function. auto funcType = typeConverter->convertType(gpuFuncOp.getType()) .template cast() .getPointerElementTy(); // Remap proper input types. TypeConverter::SignatureConversion signatureConversion( gpuFuncOp.front().getNumArguments()); getTypeConverter()->convertFunctionSignature( gpuFuncOp.getType(), /*isVariadic=*/false, signatureConversion); // Create the new function operation. Only copy those attributes that are // not specific to function modeling. SmallVector attributes; for (const auto &attr : gpuFuncOp.getAttrs()) { if (attr.first == SymbolTable::getSymbolAttrName() || attr.first == impl::getTypeAttrName() || attr.first == gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()) continue; attributes.push_back(attr); } auto llvmFuncOp = rewriter.create( gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType, LLVM::Linkage::External, attributes); { // Insert operations that correspond to converted workgroup and private // memory attributions to the body of the function. This must operate on // the original function, before the body region is inlined in the new // function to maintain the relation between block arguments and the // parent operation that assigns their semantics. OpBuilder::InsertionGuard guard(rewriter); // Rewrite workgroup memory attributions to addresses of global buffers. rewriter.setInsertionPointToStart(&gpuFuncOp.front()); unsigned numProperArguments = gpuFuncOp.getNumArguments(); auto i32Type = LLVM::LLVMType::getInt32Ty(rewriter.getContext()); Value zero = nullptr; if (!workgroupBuffers.empty()) zero = rewriter.create(loc, i32Type, rewriter.getI32IntegerAttr(0)); for (auto en : llvm::enumerate(workgroupBuffers)) { LLVM::GlobalOp global = en.value(); Value address = rewriter.create(loc, global); auto elementType = global.getType().getArrayElementType(); Value memory = rewriter.create( loc, elementType.getPointerTo(global.addr_space()), address, ArrayRef{zero, zero}); // Build a memref descriptor pointing to the buffer to plug with the // existing memref infrastructure. This may use more registers than // otherwise necessary given that memref sizes are fixed, but we can try // and canonicalize that away later. Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()]; auto type = attribution.getType().cast(); auto descr = MemRefDescriptor::fromStaticShape( rewriter, loc, *getTypeConverter(), type, memory); signatureConversion.remapInput(numProperArguments + en.index(), descr); } // Rewrite private memory attributions to alloca'ed buffers. unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions(); auto int64Ty = LLVM::LLVMType::getInt64Ty(rewriter.getContext()); for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) { Value attribution = en.value(); auto type = attribution.getType().cast(); assert(type && type.hasStaticShape() && "unexpected type in attribution"); // Explicitly drop memory space when lowering private memory // attributions since NVVM models it as `alloca`s in the default // memory space and does not support `alloca`s with addrspace(5). auto ptrType = typeConverter->convertType(type.getElementType()) .template cast() .getPointerTo(AllocaAddrSpace); Value numElements = rewriter.create( gpuFuncOp.getLoc(), int64Ty, rewriter.getI64IntegerAttr(type.getNumElements())); Value allocated = rewriter.create( gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0); auto descr = MemRefDescriptor::fromStaticShape( rewriter, loc, *getTypeConverter(), type, allocated); signatureConversion.remapInput( numProperArguments + numWorkgroupAttributions + en.index(), descr); } } // Move the region to the new function, update the entry block signature. rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(), llvmFuncOp.end()); if (failed(rewriter.convertRegionTypes( &llvmFuncOp.getBody(), *typeConverter, &signatureConversion))) return failure(); rewriter.eraseOp(gpuFuncOp); return success(); } }; struct GPUReturnOpLowering : public ConvertToLLVMPattern { GPUReturnOpLowering(LLVMTypeConverter &typeConverter) : ConvertToLLVMPattern(gpu::ReturnOp::getOperationName(), typeConverter.getDialect()->getContext(), typeConverter) {} LogicalResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { rewriter.replaceOpWithNewOp(op, operands); return success(); } }; } // namespace mlir #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_