diff --git a/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td b/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td index 79c0b35d0ca4..1d050d408618 100644 --- a/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td +++ b/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td @@ -98,7 +98,7 @@ def MIGraphX_ClipOp : // Keep that logic here. def MIGraphX_WhereOp : MIGraphX_Op<"where">, - Arguments<(ins MIXRShapedOf<[I8]>:$cond, + Arguments<(ins MIXRShapedOf<[I8, SI8, UI8]>:$cond, AnyMIXRShaped:$inA, AnyMIXRShaped:$inB)>, Results<(outs AnyMIXRShaped:$output)> { @@ -117,18 +117,14 @@ def MIGraphX_WhereOp : def MIGraphX_ConvertOp : MIGraphX_Op<"convert">, - Arguments<(ins AnyMIXRShaped:$inA, UnitAttr:$zeroExtend)>, + Arguments<(ins AnyMIXRShaped:$inA)>, Results<(outs AnyMIXRShaped:$output)> { let summary = "Elementwise type conversion"; let description = [{ Type conversion. Due to impedance mismatches between MIGraphX and Tosa, currently only supports float to float conversions - - If zeroExtend is set, the input is treated as an unsigned integer. - This is MLIR-specific, since MIGraphX encodes integer signedness in types - but MLIR generally uses signless integers. }]; - let assemblyFormat = "(`zero_extend` $zeroExtend^)? $inA attr-dict `:` type($inA) `to` type($output)"; + let assemblyFormat = "$inA attr-dict `:` type($inA) `to` type($output)"; } class MIGraphX_ElementwiseUnaryOp traits=[]> : @@ -181,10 +177,9 @@ def MIGraphX_TanhOp : // int4 operations def MIGraphX_UnpackOp : MIGraphX_Op<"unpack">, - Arguments<(ins MIXRShapedOf<[I8, I<4>]>:$in, - I64Attr:$axis, - BoolAttr:$isUnsigned)>, - Results<(outs MIXRShapedOf<[I8, I<4>]>:$out)> { + Arguments<(ins MIXRShapedOf<[I8, UI8, SI8, I<4>, SI<4>, UI<4>]>:$in, + I64Attr:$axis)>, + Results<(outs MIXRShapedOf<[I8, UI8, SI8, I<4>, SI<4>, UI<4>]>:$out)> { let summary = "Unpack int4 vaules stored as bytes"; let description = [{ Given a shaped tensor of bytes, double the length of `axis` by @@ -201,9 +196,6 @@ def MIGraphX_UnpackOp : MIGraphX_Op<"unpack">, the corresponding tensor of i8 (in which case, the `i4` are exposed as an extra dimension and not flattened) or another tensor of i4. This allows us to progressively move unpack up to function boundaries. - - If `isUnsigned` is true, the inputs are a buffer of unsigned 4-bit ints, - otherwise they are signed. }]; let assemblyFormat = [{ $in attr-dict `:` type($in) `->` type($out) }]; @@ -333,7 +325,7 @@ class MIGraphX_ConvOpBase inputTypes=[], list } def MIGraphX_QuantConvolutionOp : - MIGraphX_ConvOpBase<"quant_convolution", [F8E4M3FNUZ, F8E5M2FNUZ, F8E5M2, F8E4M3FN, I8], [F32, I32]> { + MIGraphX_ConvOpBase<"quant_convolution", [F8E4M3FNUZ, F8E5M2FNUZ, F8E5M2, F8E4M3FN, I8, SI8], [F32, I32, SI32]> { let summary = "quantized convolution forward"; let description = [{ The `migraphx.quant_convolution` op computes quantized convolution forward. @@ -510,7 +502,7 @@ class MIGraphX_DotOpBase inputTypes=[], list o } def MIGraphX_QuantDotOp : - MIGraphX_DotOpBase<"quant_dot", [F8E4M3FNUZ, F8E5M2FNUZ, F8E4M3FN, F8E5M2, I8], [F32, I32]>{ + MIGraphX_DotOpBase<"quant_dot", [F8E4M3FNUZ, F8E5M2FNUZ, F8E4M3FN, F8E5M2, I8, SI8], [F32, I32, SI32]>{ let summary = "Dot product of quantized tensors"; let description = [{ The `migraphx.quant_dot` op computes the dot product of two tensors. diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td index 82579d9db0b9..a8944d4f65e8 100644 --- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td +++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td @@ -472,11 +472,6 @@ def Rock_EnableSplitKForTuning : Rock_Attr<"EnableSplitKForTuning"> { let mnemonic = "enable_splitk_for_tuning"; } -// Used when converting 1-dimensional migraphx.convolution to tosa.conv2d -def Rock_ExpandedFrom1D : Rock_Attr<"ExpandedFrom1D"> { - let mnemonic = "expanded_from_1d"; -} - def Rock_PrefillAttr : Rock_Attr<"Prefill"> { let mnemonic = "rock.prefill"; diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp index f1b256998be7..7cd63080c529 100644 --- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp +++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp @@ -41,8 +41,27 @@ using mlir::migraphx::MIXRShapedType; //===----------------------------------------------------------------------===// migraphx::MIXRShapedToTensorConverter::MIXRShapedToTensorConverter() { - addConversion([](Type type) { return type; }); - addConversion([](MIXRShapedType shaped) { return shaped.asTensor(); }); + addConversion([](Type type) { + if (type.isInteger() && !type.isSignlessInteger()) { + type = IntegerType::get(type.getContext(), type.getIntOrFloatBitWidth(), + IntegerType::SignednessSemantics::Signless); + } + return type; + }); + addConversion([](MIXRShapedType shaped) { + RankedTensorType newType = shaped.asTensor(); + Type elementType = newType.getElementType(); + + // Convert to signless if the element type is a signed integer + if (elementType.isInteger() && !elementType.isSignlessInteger()) { + elementType = IntegerType::get( + shaped.getContext(), elementType.getIntOrFloatBitWidth(), + IntegerType::SignednessSemantics::Signless); + // Create a new tensor type with the signless element type + newType = RankedTensorType::get(newType.getShape(), elementType); + } + return newType; + }); addSourceMaterialization([](OpBuilder &b, MIXRShapedType shapedResType, ValueRange tensorResult, @@ -92,13 +111,28 @@ static TosaOp createOpAndInfer(PatternRewriter &rewriter, Location loc, return op; } -static tosa::CastOp createCastOp(PatternRewriter &rewriter, Location loc, - Type resElementType, Value input) { - ShapedType inputType = cast(input.getType()); - Type resType = inputType.cloneWith({}, resElementType); - - auto op = rewriter.create(loc, resType, input); - return op; +static Value createCastOp(PatternRewriter &rewriter, Location loc, + Type resElementType, Value input, Type inputType, + Type resElementTypeBeforeConvert = nullptr) { + ShapedType shapedInputType = cast(input.getType()); + Type resType = shapedInputType.cloneWith({}, resElementType); + + if (!resElementTypeBeforeConvert) + resElementTypeBeforeConvert = resElementType; + + Value res; + if (inputType.isUnsignedInteger() || + resElementTypeBeforeConvert.isUnsignedInteger()) { + assert(!inputType.isSignedInteger() && + !resElementTypeBeforeConvert.isSignedInteger()); + res = rewriter + .create(loc, resType, "unsigned_cast", "rocmlir", + "", input) + .getResult(0); + } else { + res = rewriter.create(loc, resType, input).getResult(); + } + return res; } static Type getShapedElementTy(Value v) { @@ -181,6 +215,7 @@ namespace { template struct ConvConverter final : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::getTypeConverter; using OpAdaptor = typename OpConversionPattern::OpAdaptor; // Note, this lowering pattern works for both migraphx.convolution and @@ -215,9 +250,14 @@ LogicalResult ConvConverter::matchAndRewrite( ValueRange results = op->getResults(); Type elementTy = inputType.getElementType(); auto outputTy = cast(results[0].getType()); + Type outElementTy = outputTy.getElementType(); + Type newOutElementTy = getTypeConverter()->convertType(outElementTy); SmallVector NCHW2NHWC{0, 2, 3, 1}; SmallVector NHWC2NCHW{0, 3, 1, 2}; + if (outElementTy.isUnsignedInteger()) + return op.emitError("No support for unsigned convolution.\n"); + int dims = outputTy.getShape().size() - 2; SmallVector toChannelLast{0}; SmallVector fromChannelLast{0, dims + 1}; @@ -237,11 +277,10 @@ LogicalResult ConvConverter::matchAndRewrite( for (int i = 0; i < dims; i++) newShape.push_back(outShape[i + 2]); newShape.push_back(outShape[1]); - Type newOutTy = RankedTensorType::get(newShape, outputTy.getElementType()); + Type newOutTy = RankedTensorType::get(newShape, newOutElementTy); // There is no tosa.conv1d, so instead we'll add a dummy x1 dimension - // to the input tensors, and make a tosa.conv2d. We'll also add the - // ExpandedFrom1D attribute so we can undo it in tosa-to-rock. + // to the input tensors, and make a tosa.conv2d. auto expandTo2D = [&rewriter, loc](mlir::Value value) { ArrayRef origShape = cast(value.getType()).getShape(); SmallVector expShape(origShape.drop_back()); @@ -259,7 +298,7 @@ LogicalResult ConvConverter::matchAndRewrite( case 1: // Expand to do a conv2d, because there's no conv1d op. newShape.insert(std::prev(newShape.end()), 1); - new1DOutTy = RankedTensorType::get(newShape, outputTy.getElementType()); + new1DOutTy = RankedTensorType::get(newShape, newOutElementTy); input = expandTo2D(input); filter = expandTo2D(filter); @@ -267,11 +306,9 @@ LogicalResult ConvConverter::matchAndRewrite( loc, new1DOutTy, ValueRange{ input, filter, - getZeroTensor(loc, outputTy.getElementType(), + getZeroTensor(loc, newOutElementTy, cast(filter.getType()).getShape()[0], rewriter)}); - cop->setAttr(rock::ExpandedFrom1DAttr::getMnemonic(), - rewriter.getUnitAttr()); break; case 2: @@ -279,7 +316,7 @@ LogicalResult ConvConverter::matchAndRewrite( loc, newOutTy, ValueRange{ input, filter, - getZeroTensor(loc, outputTy.getElementType(), + getZeroTensor(loc, newOutElementTy, cast(filter.getType()).getShape()[0], rewriter)}); break; @@ -288,7 +325,7 @@ LogicalResult ConvConverter::matchAndRewrite( loc, newOutTy, ValueRange{ input, filter, - getZeroTensor(loc, outputTy.getElementType(), + getZeroTensor(loc, newOutElementTy, cast(filter.getType()).getShape()[0], rewriter)}); break; @@ -326,7 +363,7 @@ LogicalResult ConvConverter::matchAndRewrite( return op->emitError( "1-D convolution has improper dilation, stride, or pad."); } - dilations.push_back(0); + dilations.push_back(1); strides.push_back(1); pads.push_back(0); pads.push_back(0); @@ -377,12 +414,17 @@ LogicalResult DotConverter::matchAndRewrite( auto results = op->getResults(); Type elementTy = inA.getType().getElementType(); auto origOutputTy = cast(results[0].getType()); + Type outElementTy = origOutputTy.getElementType(); + Type newOutElementTy = getTypeConverter()->convertType(outElementTy); + + if (outElementTy.isUnsignedInteger()) + return op.emitError("No support for unsigned dot product.\n"); // check batch dimension. Tosa matmul only allow a single dimension for it, // add reshape ops to flatten and restore the original dimension. ArrayRef origOutDims = origOutputTy.getShape(); RankedTensorType newOutType = - RankedTensorType::get(origOutDims, origOutputTy.getElementType()); + RankedTensorType::get(origOutDims, newOutElementTy); size_t outRank = origOutDims.size(); ArrayRef orgDimsA = inA.getType().getShape(); ArrayRef orgDimsB = inB.getType().getShape(); @@ -426,8 +468,7 @@ LogicalResult DotConverter::matchAndRewrite( } RankedTensorType newAType = RankedTensorType::get(newDimsA, elementTy); RankedTensorType newBType = RankedTensorType::get(newDimsB, elementTy); - newOutType = - RankedTensorType::get(newDimsOut, origOutputTy.getElementType()); + newOutType = RankedTensorType::get(newDimsOut, newOutElementTy); auto reshapeAOp = rewriter.create( loc, newAType, inA, rewriter.getDenseI64ArrayAttr(newDimsA)); auto reshapeBOp = rewriter.create( @@ -527,6 +568,7 @@ BroadcastConverter::matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor, ArrayRef outShape = op.getOutput().getType().getShape(); uint32_t outRank = op.getOutput().getType().getRank(); Type elemType = op.getOutput().getType().getElementType(); + Type newOutElementTy = getTypeConverter()->convertType(elemType); auto axis = static_cast(cast(op->getAttr("axis")).getInt()); @@ -539,15 +581,15 @@ BroadcastConverter::matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor, } } tosa::ReshapeOp sameRankReshapedOp = createOpAndInfer( - rewriter, loc, elemType, adaptor.getInput(), + rewriter, loc, newOutElementTy, adaptor.getInput(), rewriter.getDenseI64ArrayAttr(newShape)); - auto outType = RankedTensorType::get(outShape, elemType); + auto outType = RankedTensorType::get(outShape, newOutElementTy); // We create a dummy zero addition with implicit broadcasting // because tosa does not have an explicit broadcast op auto zeroTensor = getZeroTensor(loc, outType, rewriter); auto addWithZero = createOpAndInfer( - rewriter, loc, elemType, zeroTensor, sameRankReshapedOp); + rewriter, loc, newOutElementTy, zeroTensor, sameRankReshapedOp); rewriter.replaceOp(op, addWithZero); return success(); @@ -792,8 +834,21 @@ DivConverter::matchAndRewrite(migraphx::DivOp op, OpAdaptor adaptor, auto inBTensor = cast>(adaptor.getInB()); Type elementType = inATensor.getType().getElementType(); if (isa(elementType)) { - Value div = createOpAndInfer(rewriter, loc, elementType, - inATensor, inBTensor); + auto origAElementType = op.getInA().getType().getElementType(); + auto origBElementType = op.getInB().getType().getElementType(); + Value div; + if (origAElementType.isUnsignedInteger() || + origBElementType.isUnsignedInteger()) { + if (origAElementType != origBElementType) + return op->emitError("Types of A and B must be the same"); + mlir::SmallVector inputs = {inATensor, inBTensor}; + auto op = rewriter.create( + loc, inATensor.getType(), "unsigned_div", "rocmlir", "", inputs); + div = op->getResult(0); + } else { + div = createOpAndInfer(rewriter, loc, elementType, + inATensor, inBTensor); + } rewriter.replaceOp(op, div); return success(); } @@ -884,12 +939,17 @@ LogicalResult DeQuantizeLinearConverter::matchAndRewrite( Value output = op.getOutput(); Location loc = op->getLoc(); - Type outputType = getShapedElementTy(output); - Value upcastInput = createCastOp(rewriter, loc, outputType, input); + Type origOutputType = getShapedElementTy(output); + Type outputType = getTypeConverter()->convertType(origOutputType); + Value upcastInput = + createCastOp(rewriter, loc, outputType, input, + op.getInput().getType().getElementType(), origOutputType); Value shifted = upcastInput; if (auto bias = adaptor.getBias()) { - Value upcastBias = createCastOp(rewriter, loc, outputType, bias); + Value upcastBias = + createCastOp(rewriter, loc, outputType, bias, + op.getBias().getType().getElementType(), origOutputType); shifted = createOpAndInfer(rewriter, loc, outputType, upcastInput, upcastBias); } @@ -920,7 +980,8 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite( Value scaled = createOpAndInfer( rewriter, loc, elementType, input, inverseScale, /*shift=*/0); - Type outputType = getShapedElementTy(output); + Type origOutputType = getShapedElementTy(output); + Type outputType = getTypeConverter()->convertType(origOutputType); // If there is a bias, we upcast to the larger of the bias type and int32_t // or float (which is what the bias type is in dequantize, the MLIR // quantization implementation, and other ML frameworks) and then do a @@ -929,17 +990,20 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite( Type biasType = outputType; if (bias) { biasType = getShapedElementTy(bias); - if (biasType.getIntOrFloatBitWidth() < 32) { - biasType = isa(biasType) ? cast(rewriter.getI32Type()) - : cast(rewriter.getF32Type()); - bias = createCastOp(rewriter, loc, biasType, bias); - } } - Value asShort = createCastOp(rewriter, loc, biasType, scaled); + if ((bias || origOutputType != outputType) && + biasType.getIntOrFloatBitWidth() < 32) { + biasType = isa(biasType) ? cast(rewriter.getI32Type()) + : cast(rewriter.getF32Type()); + } + Value asShort = createCastOp(rewriter, loc, biasType, scaled, elementType); Value biased = asShort; - if (bias) + if (bias) { + bias = createCastOp(rewriter, loc, biasType, bias, + op.getBias().getType().getElementType()); biased = createOpAndInfer(rewriter, loc, biasType, asShort, bias); + } Value result = biased; if (biasType != outputType) { @@ -961,20 +1025,28 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite( minI = APInt(64, (int64_t)(minF.convertToFloat())); maxI = APInt(64, (int64_t)(minF.convertToFloat())); } else { - minI = APInt::getSignedMinValue(width); - maxI = APInt::getSignedMaxValue(width); - minF.convertFromAPInt(minI, /*IsSigned=*/true, + minI = origOutputType.isUnsignedInteger() + ? APInt::getMinValue(width) + : APInt::getSignedMinValue(width); + maxI = origOutputType.isUnsignedInteger() + ? APInt::getMaxValue(width) + : APInt::getSignedMaxValue(width); + minF.convertFromAPInt(minI, /*IsSigned=*/origOutputType.isSignedInteger(), APFloat::rmNearestTiesToEven); - maxF.convertFromAPInt(maxI, /*IsSigned=*/true, + maxF.convertFromAPInt(maxI, /*IsSigned=*/origOutputType.isSignedInteger(), APFloat::rmNearestTiesToEven); } FloatAttr minFatt = rewriter.getFloatAttr(rewriter.getF32Type(), minF); FloatAttr maxFatt = rewriter.getFloatAttr(rewriter.getF32Type(), maxF); - result = createOpAndInfer( - rewriter, loc, biasType, result, minI.getSExtValue(), - maxI.getSExtValue(), minFatt, maxFatt); - result = createCastOp(rewriter, loc, outputType, result); + auto minVal = origOutputType.isUnsignedInteger() ? minI.getZExtValue() + : minI.getSExtValue(); + auto maxVal = origOutputType.isUnsignedInteger() ? maxI.getZExtValue() + : maxI.getSExtValue(); + result = createOpAndInfer(rewriter, loc, biasType, result, + minVal, maxVal, minFatt, maxFatt); + result = createCastOp(rewriter, loc, outputType, result, biasType, + origOutputType); } rewriter.replaceOp(op, result); @@ -984,21 +1056,29 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite( LogicalResult ConvertConverter::matchAndRewrite(migraphx::ConvertOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { - if (!op.getZeroExtend()) { + + auto inputType = op.getInA().getType().getElementType(); + auto outputType = op.getResult().getType().getElementType(); + if (inputType.isUnsignedInteger() || outputType.isUnsignedInteger()) { + assert(!inputType.isSignedInteger() && !outputType.isSignedInteger()); + rewriter.replaceOpWithNewOp( + op, getTypeConverter()->convertType(op.getResult().getType()), + "unsigned_cast", "rocmlir", "", adaptor.getInA()); + } else { rewriter.replaceOpWithNewOp( op, getTypeConverter()->convertType(op.getResult().getType()), adaptor.getInA()); - return success(); } - rewriter.replaceOpWithNewOp( - op, getTypeConverter()->convertType(op.getResult().getType()), - "unsigned_cast", "rocmlir", "", adaptor.getInA()); return success(); } LogicalResult NegConverter::matchAndRewrite(migraphx::NegOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { + auto outElementType = op.getResult().getType().getElementType(); + if (outElementType.isUnsignedInteger()) + return op.emitOpError("can't negate an unsigned int type"); + rewriter.replaceOpWithNewOp( op, getTypeConverter()->convertType(op.getResult().getType()), adaptor.getInA(), nullptr); @@ -1077,11 +1157,39 @@ LogicalResult LiteralConverter::matchAndRewrite(migraphx::LiteralOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { MIXRShapedType type = op.getResult().getType(); - RankedTensorType newType = type.asTensor(); + RankedTensorType newType = + cast(getTypeConverter()->convertType(type)); + ElementsAttr value = op.getValue(); - if (value.isSplat() && value.getType() != newType) - value = SplatElementsAttr::get(newType, value.getSplatValue()); - rewriter.replaceOpWithNewOp(op, type.asTensor(), value); + if (value.getType() != newType) { + if (value.isSplat()) { + // Get the original splat value (for example SI8 value) + Attribute splatValue = value.getSplatValue(); + + // Reinterpret the splatValue under the new type (for example SI8 -> I8), + // preserving bytes + Attribute newSplatValue; + if (auto intAttr = dyn_cast(splatValue)) + newSplatValue = + IntegerAttr::get(newType.getElementType(), intAttr.getValue()); + else if (auto floatAttr = dyn_cast(splatValue)) + newSplatValue = + FloatAttr::get(newType.getElementType(), floatAttr.getValue()); + else + return failure(); + + // Create the new SplatElementsAttr (for example I8 type) with preserved + // value bytes + value = SplatElementsAttr::get(newType, newSplatValue); + } else { + // Reinterpret existing values under the new type + auto originalAttr = cast(value); + value = DenseElementsAttr::get(newType, originalAttr.getRawData()); + } + } + + // Replace with the new operation using the updated tensor type + rewriter.replaceOpWithNewOp(op, newType, value); return success(); } @@ -1107,7 +1215,8 @@ WhereConverter::matchAndRewrite(migraphx::WhereOp op, OpAdaptor adaptor, Value rawCond = adaptor.getCond(); Value inA = adaptor.getInA(); Value inB = adaptor.getInB(); - Value cond = createCastOp(rewriter, loc, rewriter.getI1Type(), rawCond); + Value cond = createCastOp(rewriter, loc, rewriter.getI1Type(), rawCond, + op.getCond().getType().getElementType()); rewriter.replaceOpWithNewOp( op, getTypeConverter()->convertType(op.getResult().getType()), cond, inA, inB); @@ -1275,27 +1384,26 @@ LogicalResult MHALLaunchConverter::matchAndRewrite( void migraphx::populateMIGraphXToTosaConversionPatterns( RewritePatternSet &patterns, TypeConverter &typeConverter) { - patterns - .add, ConvConverter, - DotConverter, DotConverter, BroadcastConverter, - MultiBroadcastConverter, TransposeConverter, ReshapeConverter, - SliceConverter, ReduceMeanConverter, ReduceSumConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, DivConverter, MulConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, - TrivialConverter, DeQuantizeLinearConverter, - QuantizeLinearConverter, DeQuantizeLinearConverter, ConvertConverter, - NegConverter, ReluConverter, SoftmaxConverter, LiteralConverter, - ClipConverter, WhereConverter>(typeConverter, patterns.getContext()); + patterns.add, ConvConverter, + DotConverter, DotConverter, + BroadcastConverter, MultiBroadcastConverter, TransposeConverter, + ReshapeConverter, SliceConverter, ReduceMeanConverter, + ReduceSumConverter, TrivialConverter, + TrivialConverter, + TrivialConverter, DivConverter, MulConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, + TrivialConverter, QuantizeLinearConverter, + DeQuantizeLinearConverter, ConvertConverter, NegConverter, + ReluConverter, SoftmaxConverter, LiteralConverter, ClipConverter, + WhereConverter>(typeConverter, patterns.getContext()); } void mlir::migraphx::populateMIGraphXFuncBoundaryToTosaConversionPatterns( diff --git a/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp b/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp index 6ed91afe28d9..112c08cc6946 100644 --- a/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp +++ b/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp @@ -49,35 +49,61 @@ LogicalResult UnsignedCastLoweringPattern::matchAndRewrite( ConversionPatternRewriter &rewriter) const { if (op.getDomainName() != "rocmlir") return rewriter.notifyMatchFailure(op, "domain isn't rocmlir"); - if (op.getOperatorName() != "unsigned_cast") - return rewriter.notifyMatchFailure(op, "isn't an unsigned_cast"); + if (op.getOperatorName() != "unsigned_cast" && + op.getOperatorName() != "unsigned_div") + return rewriter.notifyMatchFailure( + op, "isn't an unsigned_cast or unsigned_div"); + Location loc = op.getLoc(); auto outType = cast(op.getResults().front().getType()); + Type inElemType = + cast(op.getInputs().front().getType()).getElementType(); Type outElemType = outType.getElementType(); Value emptyTensor = rewriter.create( loc, outType, /*dynamic_sizes=*/ValueRange{}); SmallVector iterationMaps( - 2, rewriter.getMultiDimIdentityMap(outType.getRank())); + op.getInputs().size() + 1, + rewriter.getMultiDimIdentityMap(outType.getRank())); SmallVector iteratorKinds(outType.getRank(), utils::IteratorType::parallel); - auto cast = rewriter.create( + auto genericOp = rewriter.create( loc, outType, adaptor.getInputs(), emptyTensor, iterationMaps, iteratorKinds, [&](OpBuilder &b, Location loc, ValueRange inputs) { Value result; - if (isa(outElemType)) - result = b.create(loc, outElemType, inputs[0]); - else - result = b.create(loc, outElemType, inputs[0]); + if (op.getOperatorName() == "unsigned_cast") { + assert(inputs.size() == 2); + if (isa(inElemType)) { + if (isa(outElemType)) { + result = b.create(loc, outElemType, inputs[0]); + } else if (outElemType.getIntOrFloatBitWidth() > + inElemType.getIntOrFloatBitWidth()) { + result = b.create(loc, outElemType, inputs[0]); + } else { + result = b.create(loc, outElemType, inputs[0]); + } + } else { + assert(isa(inElemType)); + assert(isa(outElemType)); + result = b.create(loc, outElemType, inputs[0]); + } + } else if (op.getOperatorName() == "unsigned_div") { + assert(isa(outElemType)); + assert(isa(inElemType)); + assert(inputs.size() == 3); + result = + b.create(loc, outElemType, inputs[0], inputs[1]); + } b.create(loc, result); }); - rewriter.replaceOp(op, cast); + rewriter.replaceOp(op, genericOp); return success(); } void mlir::rock::populateRocmlirCustomTosaToLinalgTarget( ConversionTarget &target) { target.addLegalOp(); target.addDynamicallyLegalOp( [](tosa::CustomOp op) { return op.getDomainName() != "rocmlir"; }); diff --git a/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp b/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp index 6cf3b337f255..3e241923994c 100644 --- a/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp +++ b/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp @@ -246,7 +246,13 @@ RankedTensorType MIXRShapedType::asMemoryLayoutTensor() const { orderedShape[prevIdx] = stride / prevStride; } } - return RankedTensorType::get(orderedShape, getElementType()); + Type elementType = getElementType(); + if (elementType.isInteger() && !elementType.isSignlessInteger()) { + elementType = + IntegerType::get(getContext(), elementType.getIntOrFloatBitWidth(), + IntegerType::SignednessSemantics::Signless); + } + return RankedTensorType::get(orderedShape, elementType); } RankedTensorType MIXRShapedType::asFlatMemoryTensor() const { diff --git a/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp b/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp index 08d8885c8eca..13c299e49e0e 100644 --- a/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp +++ b/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp @@ -77,8 +77,15 @@ static MIXRShapedType asInt4Tensor(const MIXRShapedType byteType, llvm::enumerate(MutableArrayRef(strides))) if (static_cast(index) != axis) stride *= 2; - return MIXRShapedType::get(sizes, strides, - IntegerType::get(byteType.getContext(), 4)); + + auto signedness = IntegerType::SignednessSemantics::Signless; + if (byteType.getElementType().isUnsignedInteger()) + signedness = IntegerType::SignednessSemantics::Unsigned; + else if (byteType.getElementType().isSignedInteger()) + signedness = IntegerType::SignednessSemantics::Signed; + + return MIXRShapedType::get( + sizes, strides, IntegerType::get(byteType.getContext(), 4, signedness)); } LogicalResult RewriteByteUnpackPattern::matchAndRewrite( @@ -91,12 +98,9 @@ LogicalResult RewriteByteUnpackPattern::matchAndRewrite( int64_t axis = op.getAxis(); MIXRShapedType packedByteType = op.getIn().getType(); MIXRShapedType actualType = asInt4Tensor(packedByteType, axis); - Value reinterpreted = rewriter.create( - loc, actualType, adaptor.getIn(), axis, adaptor.getIsUnsigned()); - rewriter.replaceOpWithNewOp(op, outType, reinterpreted, - /*zeroExtend=*/adaptor.getIsUnsigned() - ? rewriter.getUnitAttr() - : nullptr); + Value reinterpreted = + rewriter.create(loc, actualType, adaptor.getIn(), axis); + rewriter.replaceOpWithNewOp(op, outType, reinterpreted); return success(); } @@ -113,8 +117,7 @@ LogicalResult TransposeUnpackInterchange::matchAndRewrite( MIXRShapedType preTrReinterpretedType = asInt4Tensor(trOp.getInput().getType(), preTransposeAxis); Value reinterpreted = rewriter.create( - op.getLoc(), preTrReinterpretedType, trOp.getInput(), preTransposeAxis, - adaptor.getIsUnsigned()); + op.getLoc(), preTrReinterpretedType, trOp.getInput(), preTransposeAxis); // Not a replaceOpWithNewOp() because we're keeping a different op's location. Value transposed = rewriter.create( trOp.getLoc(), op.getOut().getType(), reinterpreted, permutation); @@ -144,9 +147,8 @@ LogicalResult ReshapeUnpackInterchange::matchAndRewrite( lastUnitDim = idx; MIXRShapedType oldShapeInt4 = asInt4Tensor(oldShapeBytes, lastUnitDim); MIXRShapedType newShapeInt4 = op.getOut().getType(); - Value reinterpreted = - rewriter.create(op.getLoc(), oldShapeInt4, reshapeOp.getInput(), - lastUnitDim, adaptor.getIsUnsigned()); + Value reinterpreted = rewriter.create( + op.getLoc(), oldShapeInt4, reshapeOp.getInput(), lastUnitDim); Value reshaped = rewriter.create( reshapeOp.getLoc(), newShapeInt4, reinterpreted, rewriter.getI64ArrayAttr(newShapeInt4.getShape())); @@ -176,8 +178,7 @@ LogicalResult MultiBroadcastUnpackInterchange::matchAndRewrite( } } Value reinterpreted = rewriter.create( - op.getLoc(), preBroadcastInt4, broadcastOp.getInput(), adaptor.getAxis(), - adaptor.getIsUnsigned()); + op.getLoc(), preBroadcastInt4, broadcastOp.getInput(), adaptor.getAxis()); Value broadcasted = rewriter.create( broadcastOp.getLoc(), op.getOut().getType(), reinterpreted, rewriter.getArrayAttr(newOutLens)); @@ -195,7 +196,7 @@ LogicalResult FuncArgUnpackElimination::matchAndRewrite( dyn_cast(unpackArg.getParentRegion()->getParentOp()); if (!funcOp) return op.emitOpError("A tensor that'll be unpacked is an argument to " - "somethng other than a function"); + "something other than a function"); MIXRShapedType int4Type = op.getResult().getType(); FunctionType funcType = funcOp.getFunctionType(); SmallVector newInTypes(funcType.getInputs()); diff --git a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir new file mode 100644 index 000000000000..27ee6a9da00d --- /dev/null +++ b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir @@ -0,0 +1,222 @@ +// RUN: rocmlir-opt -split-input-file --migraphx-to-tosa %s | FileCheck %s + +// CHECK-LABEL: @migraphx_literal_dense_ui8() +// CHECK-SAME: -> tensor<4xi8> { +func.func @migraphx_literal_dense_ui8() -> !migraphx.shaped<4xui8, 1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[23, 28, 19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8> + // CHECK-NEXT: return %[[const]] : tensor<4xi8> + %0 = migraphx.literal (dense<[23, 28, 19, 20]> : tensor<4xui8>) : <4xui8, 1> + return %0 : !migraphx.shaped<4xui8, 1> +} + +// CHECK-LABEL: @migraphx_literal_dense_si8() +// CHECK-SAME: -> tensor<4xi8> { +func.func @migraphx_literal_dense_si8() -> !migraphx.shaped<4xsi8, 1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-23, 28, -19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8> + // CHECK-NEXT: return %[[const]] : tensor<4xi8> + %0 = migraphx.literal (dense<[-23, 28, -19, 20]> : tensor<4xsi8>) : <4xsi8, 1> + return %0 : !migraphx.shaped<4xsi8, 1> +} + +// CHECK-LABEL: @migraphx_literal_dense_i8() +// CHECK-SAME: -> tensor<4xi8> { +func.func @migraphx_literal_dense_i8() -> !migraphx.shaped<4xi8, 1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-23, 28, -19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8> + // CHECK-NEXT: return %[[const]] : tensor<4xi8> + %0 = migraphx.literal (dense<[-23, 28, -19, 20]> : tensor<4xi8>) : <4xi8, 1> + return %0 : !migraphx.shaped<4xi8, 1> +} + +// CHECK-LABEL: @migraphx_literal_dense_f16() +// CHECK-SAME: -> tensor<4xf16> { +func.func @migraphx_literal_dense_f16() -> !migraphx.shaped<4xf16, 1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-2.300000e+01, 2.800000e+01, -1.900000e+01, 2.000000e+01]> : tensor<4xf16>}> : () -> tensor<4xf16> + // CHECK-NEXT: return %[[const]] : tensor<4xf16> + %0 = migraphx.literal (dense<[-23.0, 28.0, -19.0, 20.0]> : tensor<4xf16>) : <4xf16, 1> + return %0 : !migraphx.shaped<4xf16, 1> +} + +// CHECK-LABEL: @migraphx_literal_dense_f32() +// CHECK-SAME: -> tensor<4xf32> { +func.func @migraphx_literal_dense_f32() -> !migraphx.shaped<4xf32, 1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-2.300000e+01, 2.800000e+01, -1.900000e+01, 2.000000e+01]> : tensor<4xf32>}> : () -> tensor<4xf32> + // CHECK-NEXT: return %[[const]] : tensor<4xf32> + %0 = migraphx.literal (dense<[-23.0, 28.0, -19.0, 20.0]> : tensor<4xf32>) : <4xf32, 1> + return %0 : !migraphx.shaped<4xf32, 1> +} + +// CHECK-LABEL: @migraphx_literal_zero() +// CHECK-SAME: -> tensor<9408xi8> { +func.func @migraphx_literal_zero() -> !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<0> : tensor<64x3x7x7xi8>}> : () -> tensor<64x3x7x7xi8> + // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array} : (tensor<64x3x7x7xi8>) -> tensor<9408xi8> + // CHECK-NEXT: return %[[reshape]] : tensor<9408xi8> + %0 = migraphx.literal (dense<0> : tensor<64x1xsi8>) : <64x3x7x7xsi8, 147x49x7x1> + return %0 : !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> +} + +// CHECK-LABEL: @migraphx_literal_negative() +// CHECK-SAME: -> tensor<9408xi8> { +func.func @migraphx_literal_negative() -> !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> { + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<-1> : tensor<64x3x7x7xi8>}> : () -> tensor<64x3x7x7xi8> + // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array} : (tensor<64x3x7x7xi8>) -> tensor<9408xi8> + // CHECK-NEXT: return %[[reshape]] : tensor<9408xi8> + %0 = migraphx.literal (dense<-1> : tensor<64x1xsi8>) : <64x3x7x7xsi8, 147x49x7x1> + return %0 : !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> +} + +// CHECK-LABEL: @migraphx_convert_int4_signed +// CHECK: tosa.cast +// CHECK-SAME: (tensor<16xi4>) -> tensor<16xi8> +func.func @migraphx_convert_int4_signed(%arg0: !migraphx.shaped<16xsi4, 1>) -> !migraphx.shaped<16xsi8, 1> { + %0 = migraphx.convert %arg0 : <16xsi4, 1> to <16xsi8, 1> + return %0 : !migraphx.shaped<16xsi8, 1> +} + +// CHECK-LABEL: @migraphx_convert_int4_unsigned +// CHECK: tosa.custom +// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xi8> +func.func @migraphx_convert_int4_unsigned(%arg0: !migraphx.shaped<16xui4, 1>) -> !migraphx.shaped<16xui8, 1> { + %0 = migraphx.convert %arg0 : <16xui4, 1> to <16xui8, 1> + return %0 : !migraphx.shaped<16xui8, 1> +} + +// CHECK-LABEL: @migraphx_convert_int4_unsigned_reverse +// CHECK: tosa.custom +// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi8>) -> tensor<16xi4> +func.func @migraphx_convert_int4_unsigned_reverse(%arg0: !migraphx.shaped<16xui8, 1>) -> !migraphx.shaped<16xui4, 1> { + %0 = migraphx.convert %arg0 : <16xui8, 1> to <16xui4, 1> + return %0 : !migraphx.shaped<16xui4, 1> +} + +// CHECK-LABEL: @migraphx_convert_int4_unsigned_to_float +// CHECK: tosa.custom +// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xf32> +func.func @migraphx_convert_int4_unsigned_to_float(%arg0: !migraphx.shaped<16xui4, 1>) -> !migraphx.shaped<16xf32, 1> { + %0 = migraphx.convert %arg0 : <16xui4, 1> to <16xf32, 1> + return %0 : !migraphx.shaped<16xf32, 1> +} + +// CHECK-LABEL: @migraphx_convert_int4_float_to_unsigned +// CHECK: tosa.custom +// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xf32>) -> tensor<16xi4> +func.func @migraphx_convert_int4_float_to_unsigned(%arg0: !migraphx.shaped<16xf32, 1>) -> !migraphx.shaped<16xui4, 1> { + %0 = migraphx.convert %arg0 : <16xf32, 1> to <16xui4, 1> + return %0 : !migraphx.shaped<16xui4, 1> +} + +// CHECK-LABEL: @migraphx_div_si32 +// CHECK: tosa.int_div +// CHECK-SAME: (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32> +func.func @migraphx_div_si32(%arg0: !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1>, %arg1: !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1>) -> !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1> attributes{kernel, arch = ""} { + %0 = migraphx.div %arg0, %arg1 : <1x36x384x64xsi32, 884736x24576x64x1>, <1x36x384x64xsi32, 884736x24576x64x1> -> <1x36x384x64xsi32, 884736x24576x64x1> + return %0 : !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1> +} + +// CHECK-LABEL: @migraphx_div_ui32 +// CHECK: tosa.custom +// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_div"} : (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32> +func.func @migraphx_div_ui32(%arg0: !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1>, %arg1: !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1>) -> !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1> attributes{kernel, arch = ""} { + %0 = migraphx.div %arg0, %arg1 : <1x36x384x64xui32, 884736x24576x64x1>, <1x36x384x64xui32, 884736x24576x64x1> -> <1x36x384x64xui32, 884736x24576x64x1> + return %0 : !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1> +} + +// CHECK-LABEL: func @dequantize_scale_bias_ui32 +// CHECK: tosa.custom %{{.*}} {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xf32> +// CHECK: tosa.custom %{{.*}} {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<64xi32>) -> tensor<64xf32> +// CHECK: tosa.sub +// CHECK: tosa.mul +func.func @dequantize_scale_bias_ui32(%arg: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui32, 1>) -> !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.dequantizelinear %arg, %scale, %bias : <1x112x112x64xui32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui32, 1> -> <1x112x112x64xf32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @dequantize_scale_bias_si32 +// CHECK: tosa.cast{{.*}}f32 +// CHECK: tosa.cast{{.*}}f32 +// CHECK: tosa.sub +// CHECK: tosa.mul +func.func @dequantize_scale_bias_si32(%arg: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi32, 1>) -> !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.dequantizelinear %arg, %scale, %bias : <1x112x112x64xsi32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi32, 1> -> <1x112x112x64xf32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @quantize_scale_bias_ui32 +// CHECK: tosa.reciprocal +// CHECK: tosa.mul +// CHECK: tosa.cast{{.*}}: (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xi32> +// CHECK: tosa.add +func.func @quantize_scale_bias_ui32(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui32, 1>) -> !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui32, 1> -> <1x112x112x64xui32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @quantize_scale_bias_si32 +// CHECK: tosa.reciprocal +// CHECK: tosa.mul +// CHECK: tosa.cast{{.*}}f32{{.*}}i32 +// CHECK: tosa.add +func.func @quantize_scale_bias_si32(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi32, 1>) -> !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi32, 1> -> <1x112x112x64xsi32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @quantize_scale_bias_ui8 +// CHECK: tosa.reciprocal +// CHECK: tosa.mul +// CHECK: tosa.cast{{.*}}: (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xi32> +// CHECK: tosa.custom{{.*}}i8{{.*}}i32 +// CHECK: tosa.add +// CHECK: tosa.clamp{{.*}}i32{{.*}}i32 +// CHECK: tosa.custom{{.*}}i32{{.*}}i8 +func.func @quantize_scale_bias_ui8(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui8, 1>) -> !migraphx.shaped<1x112x112x64xui8, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui8, 1> -> <1x112x112x64xui8, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xui8, 802816x7168x64x1> +} + +// CHECK-LABEL: func @quantize_scale_bias_si8 +// CHECK: tosa.reciprocal +// CHECK: tosa.mul +// CHECK: tosa.cast{{.*}}f32{{.*}}i32 +// CHECK: tosa.cast{{.*}}i8{{.*}}i32 +// CHECK: tosa.add +// CHECK: tosa.clamp{{.*}}i32{{.*}}i32 +// CHECK: tosa.cast{{.*}}i32{{.*}}i8 +func.func @quantize_scale_bias_si8(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi8, 1>) -> !migraphx.shaped<1x112x112x64xsi8, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi8, 1> -> <1x112x112x64xsi8, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xsi8, 802816x7168x64x1> +} + +// CHECK-LABEL: func @basic_add_ui32 +// CHECK: tosa.add{{.*}}(tensor<1x112x112x64xi32>, tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xi32> +func.func @basic_add_ui32(%arg0: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>, %arg1: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>) -> !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.add %arg0, %arg1 : <1x112x112x64xui32, 802816x7168x64x1>, <1x112x112x64xui32, 802816x7168x64x1> -> <1x112x112x64xui32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @basic_add_si32 +// CHECK: tosa.add{{.*}}(tensor<1x112x112x64xi32>, tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xi32> +func.func @basic_add_si32(%arg0: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>, %arg1: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>) -> !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> attributes {kernel = "mixr"} { + %1 = migraphx.add %arg0, %arg1 : <1x112x112x64xsi32, 802816x7168x64x1>, <1x112x112x64xsi32, 802816x7168x64x1> -> <1x112x112x64xsi32, 802816x7168x64x1> + return %1 : !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> +} + +// CHECK-LABEL: func @conv_with_quant_si8 +// CHECK: tosa.conv2d{{.*}}quantization_info{{.*}}(tensor<1x224x224x3xi8>, tensor<64x7x7x3xi8>, tensor<64xi32>) -> tensor<1x112x112x64xi32> +// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xf32> +// CHECK: tosa.cast{{.*}}(tensor<1x64x1x1xi32>) -> tensor<1x64x1x1xf32> +// CHECK: tosa.sub{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32> +// CHECK: tosa.mul{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32> +// CHECK: tosa.reciprocal{{.*}}(tensor<1x64x1x1xf32>) -> tensor<1x64x1x1xf32> +// CHECK: tosa.mul{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32> +// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xi32> +// CHECK: tosa.cast{{.*}}(tensor<1x64x1x1xi8>) -> tensor<1x64x1x1xi32> +// CHECK: tosa.add{{.*}}(tensor<1x64x112x112xi32>, tensor<1x64x1x1xi32>) -> tensor<1x64x112x112xi32> +// CHECK: tosa.clamp{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xi32> +// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xi8> +func.func @conv_with_quant_si8(%arg1: !migraphx.shaped<1x3x224x224xsi8, 150528x50176x224x1>, %arg2: !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1>, %scale: !migraphx.shaped<1x64x1x1xf32, 64x1x1x1>, %bias: !migraphx.shaped<1x64x1x1xsi32, 64x1x1x1>, %bias2: !migraphx.shaped<1x64x1x1xsi8, 64x1x1x1>) -> !migraphx.shaped<1x64x112x112xsi8, 802816x12544x112x1> attributes {kernel = "mixr"} { + %1 = migraphx.quant_convolution %arg1, %arg2 {dilation = [1, 1], group = 1 : i64, padding = [3, 3, 3, 3], padding_mode = 0 : i64, stride = [2, 2]} : <1x3x224x224xsi8, 150528x50176x224x1>, <64x3x7x7xsi8, 147x49x7x1> -> <1x64x112x112xsi32, 802816x12544x112x1> + %2 = migraphx.dequantizelinear %1, %scale, %bias : <1x64x112x112xsi32, 802816x12544x112x1>, <1x64x1x1xf32, 64x1x1x1>, !migraphx.shaped<1x64x1x1xsi32, 64x1x1x1> -> <1x64x112x112xf32, 802816x12544x112x1> + %3 = migraphx.quantizelinear %2, %scale, %bias2 : <1x64x112x112xf32, 802816x12544x112x1>, <1x64x1x1xf32, 64x1x1x1>, !migraphx.shaped<1x64x1x1xsi8, 64x1x1x1> -> <1x64x112x112xsi8, 802816x12544x112x1> + return %3 : !migraphx.shaped<1x64x112x112xsi8, 802816x12544x112x1> +} diff --git a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir index f9526ecbd500..e671fd336c52 100644 --- a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir +++ b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir @@ -167,7 +167,7 @@ func.func @scalar0d(%arg0: !migraphx.shaped) -> !migraphx.shaped { // CHECK-LABEL: @conv3d_add // CHECK-SAME: (%{{.*}}: tensor<4xf32>, %{{.*}}: tensor<750xf32>, %{{.*}}: tensor<96xf32>) -> tensor<64xf32> -func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { +func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> { // CHECK-COUNT-3: tosa.transpose // CHECK: tosa.conv3d // CHECK-SAME: (tensor<2x5x5x5x3xf32>, tensor<4x2x2x2x3xf32>, tensor<4xf32>) -> tensor<2x2x2x2x4xf32> @@ -176,3 +176,16 @@ func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1: %1 = migraphx.add %0, %arg0 : <2x4x2x2x2xf32, 32x8x4x2x1>, <2x4x2x2x2xf32, 0x1x0x0x0> -> <2x4x2x2x2xf32, 32x8x4x2x1> return %1 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> } + +// CHECK-LABEL: @conv1d_add +// CHECK-SAME: (%{{.*}}: tensor<64xf32>, %{{.*}}: tensor<672xf32>, %{{.*}}: tensor<1344xf32>) -> tensor<14336xf32> +func.func @conv1d_add(%arg0: !migraphx.shaped<1x64x224xf32, 0x1x0>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> { + // CHECK-COUNT-3: tosa.transpose + // CHECK: tosa.conv2d + // CHECK-SAME: {dilation = array, group = 1 : i64, pad = array, stride = array} : (tensor<1x224x1x3xf32>, tensor<64x7x1x3xf32>, tensor<64xf32>) -> tensor<1x224x1x64xf32> + // CHECK-2: tosa.transpose + // CHECK: tosa.add + %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1> + %1 = migraphx.add %0, %arg0 : <1x64x224xf32, 14336x224x1>, <1x64x224xf32, 0x1x0> -> <1x64x224xf32, 14336x224x1> + return %1 : !migraphx.shaped<1x64x224xf32, 14336x224x1> +} diff --git a/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir b/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir index 28eebb812525..62cd1d8e5e74 100644 --- a/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir +++ b/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir @@ -1,6 +1,15 @@ // RUN: rocmlir-opt -split-input-file --migraphx-transform --canonicalize --migraphx-to-tosa %s -verify-diagnostics -o -| FileCheck %s module { + // CHECK-LABEL: func @literal_zero + // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64x3x7x7xf16>}> : () -> tensor<64x3x7x7xf16> + // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array} : (tensor<64x3x7x7xf16>) -> tensor<9408xf16> + // CHECK-NEXT: return %[[reshape]] : tensor<9408xf16> + func.func @literal_zero() -> !migraphx.shaped<64x3x7x7xf16, 147x49x7x1> { + %0 = migraphx.literal (dense<0.0> : tensor<64x1xf16>) : <64x3x7x7xf16, 147x49x7x1> + return %0 : !migraphx.shaped<64x3x7x7xf16, 147x49x7x1> + } + // CHECK-LABEL: func @dequantize_scale // CHECK-NOT: tosa.sub // CHECK: tosa.cast @@ -108,8 +117,8 @@ module { // CHECK-LABEL: func @quantize_scale_bias // CHECK: tosa.reciprocal // CHECK: tosa.mul - // CHECK: tosa.cast{{.*}}i8{{.*}}i32 // CHECK: tosa.cast{{.*}}f32{{.*}}i32 + // CHECK: tosa.cast{{.*}}i8{{.*}}i32 // CHECK: tosa.add // CHECK: tosa.clamp // CHECK-SAME: max_int = 127 @@ -123,8 +132,8 @@ module { // CHECK-LABEL: func @quantize_scale_bias_fp8 // CHECK: tosa.reciprocal // CHECK: tosa.mul - // CHECK: tosa.cast{{.*}}f8E4M3FNUZ{{.*}}f32 // CHECK: tosa.cast{{.*}}f32{{.*}}f32 + // CHECK: tosa.cast{{.*}}f8E4M3FNUZ{{.*}}f32 // CHECK: tosa.add // CHECK: tosa.clamp // CHECK-SAME: max_fp = 2.400000e+02 @@ -138,8 +147,8 @@ module { // CHECK-LABEL: func @quantize_scale_bias_fp8_ocp // CHECK: tosa.reciprocal // CHECK: tosa.mul - // CHECK: tosa.cast{{.*}}f8E4M3FN{{.*}}f32 // CHECK: tosa.cast{{.*}}f32{{.*}}f32 + // CHECK: tosa.cast{{.*}}f8E4M3FN{{.*}}f32 // CHECK: tosa.add // CHECK: tosa.clamp // CHECK-SAME: max_fp = 4.480000e+02 @@ -153,8 +162,8 @@ module { // CHECK-LABEL: func @quantize_scale_bias_f16 // CHECK: tosa.reciprocal // CHECK: tosa.mul - // CHECK: tosa.cast{{.*}}i8{{.*}}i32 // CHECK: tosa.cast{{.*}}f16{{.*}}i32 + // CHECK: tosa.cast{{.*}}i8{{.*}}i32 // CHECK: tosa.add // CHECK: tosa.clamp // CHECK: tosa.cast @@ -238,6 +247,24 @@ module { return %0 : !migraphx.shaped<32x64xf32, 64x1> } + // CHECK-LABEL: func.func @matmul_broadcast_op + func.func @matmul_broadcast_op(%arg0: !migraphx.shaped<64x64x2304xf16, 147456x2304x1>, %arg1: !migraphx.shaped<64x64x768xf16, 49152x768x1>, %arg2: !migraphx.shaped<1x768x2304xf16, 1769472x2304x1>) -> !migraphx.shaped<64x64x2304xf16, 147456x2304x1> attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr"} { + // CHECK-DAG: %[[ARG2:.*]] = tosa.reshape %arg2 {new_shape = array} + // CHECK-DAG: %[[ARG1:.*]] = tosa.reshape %arg1 {new_shape = array} + // CHECK-DAG: %[[ARG0:.*]] = tosa.reshape %arg0 {new_shape = array} + // CHECK-DAG: %[[INPUT:.*]] = tosa.reshape %[[ARG2]] {new_shape = array} + %0 = migraphx.broadcast %arg2 {axis = 0, out_lens = [64, 768, 2304]} : <1x768x2304xf16, 1769472x2304x1> -> <64x768x2304xf16, 0x2304x1> + // CHECK-DAG: %[[CST0:.*]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64x768x2304xf16>}> : () -> tensor<64x768x2304xf16> + // CHECK-DAG: %[[ADD:.*]] = tosa.add %[[CST0]], %[[INPUT]] + %1 = migraphx.dot %arg1, %0 : <64x64x768xf16, 49152x768x1>, <64x768x2304xf16, 0x2304x1> -> <64x64x2304xf16, 147456x2304x1> + // CHECK-DAG: %[[MATMUL:.*]] = tosa.matmul %[[ARG1]], %[[ADD]] + // CHECK-DAG: %[[BIASED:.*]] = tosa.add %[[MATMUL]], %[[ARG0]] + // CHECK-DAG: %[[RET:.*]] = tosa.reshape %[[BIASED]] {new_shape = array} + // CHECK: return %[[RET]] + %2 = migraphx.add %1, %arg0 : <64x64x2304xf16, 147456x2304x1>, <64x64x2304xf16, 147456x2304x1> -> <64x64x2304xf16, 147456x2304x1> + return %2 : !migraphx.shaped<64x64x2304xf16, 147456x2304x1> + } + // CHECK-LABEL: func.func @matmul_broadcast func.func @matmul_broadcast(%arg0: !migraphx.shaped<64x64x2304xf16, 147456x2304x1>, %arg1: !migraphx.shaped<64x64x768xf16, 49152x768x1>, %arg2: !migraphx.shaped<1x768x2304xf16, 1769472x2304x1>) -> !migraphx.shaped<64x64x2304xf16, 147456x2304x1> attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr"} { // CHECK-DAG: %[[ARG2:.*]] = tosa.reshape %arg2 {new_shape = array} @@ -541,14 +568,6 @@ module { return %0 : !migraphx.shaped<16xf32, 1> } - // CHECK-LABEL: func.func @func_convert - // CHECK: tosa.custom - // CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xi8> - func.func @func_convert_int4_unsigned(%arg0: !migraphx.shaped<16xi4, 1>) -> !migraphx.shaped<16xi8, 1> { - %0 = migraphx.convert zero_extend %arg0 : <16xi4, 1> to <16xi8, 1> - return %0 : !migraphx.shaped<16xi8, 1> - } - // CHECK-LABEL: func.func @func_div_f32 // CHECK: tosa.reciprocal // CHECK: tosa.mul diff --git a/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir b/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir index a63aa7e6624f..55dd7239a624 100644 --- a/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir +++ b/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir @@ -1,6 +1,6 @@ // RUN: rocmlir-opt --rocmlir-custom-tosa-to-linalg --split-input-file %s | FileCheck %s -// CHECK-LABEL: @integers +// CHECK-LABEL: @integers_i4_to_i8 // CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>) // CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi8> // CHECK: %[[ret:.+]] = linalg.generic @@ -11,17 +11,91 @@ // CHECK-NEXT: linalg.yield %[[res]] // CHECK-NEXT: -> tensor<8x8x2xi8> // CHECK-NEXT: return %[[ret]] -func.func @integers(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xi8> { +func.func @integers_i4_to_i8(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xi8> { %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xi8> func.return %out : tensor<8x8x2xi8> } +// CHECK-LABEL: @integers_i8_to_i4 +// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi8>) +// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi4> +// CHECK: %[[ret:.+]] = linalg.generic +// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi8>) +// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xi4>) +// CHECK-NEXT: %[[in:.+]]: i8 +// CHECK-NEXT: %[[res:.+]] = arith.trunci %[[in]] : i8 to i4 +// CHECK-NEXT: linalg.yield %[[res]] +// CHECK-NEXT: -> tensor<8x8x2xi4> +// CHECK-NEXT: return %[[ret]] +func.func @integers_i8_to_i4(%arg0: tensor<8x8x2xi8>) -> tensor<8x8x2xi4> { + %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi8>) -> tensor<8x8x2xi4> + func.return %out : tensor<8x8x2xi4> +} + // ----- -// CHECK-LABEL: @floats -// CHECK: linalg.generic -// CHECK: arith.uitofp -func.func @floats(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf16> { +// CHECK-LABEL: @floats_i4_to_f16 +// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>) +// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xf16> +// CHECK: %[[ret:.+]] = linalg.generic +// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi4>) +// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xf16>) +// CHECK-NEXT: %[[in:.+]]: i4 +// CHECK-NEXT: %[[res:.+]] = arith.uitofp %[[in]] : i4 to f16 +// CHECK-NEXT: linalg.yield %[[res]] +// CHECK-NEXT: -> tensor<8x8x2xf16> +// CHECK-NEXT: return %[[ret]] +func.func @floats_i4_to_f16(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf16> { %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xf16> func.return %out : tensor<8x8x2xf16> } + +// CHECK-LABEL: @floats_i4_to_f32 +// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>) +// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xf32> +// CHECK: %[[ret:.+]] = linalg.generic +// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi4>) +// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xf32>) +// CHECK-NEXT: %[[in:.+]]: i4 +// CHECK-NEXT: %[[res:.+]] = arith.uitofp %[[in]] : i4 to f32 +// CHECK-NEXT: linalg.yield %[[res]] +// CHECK-NEXT: -> tensor<8x8x2xf32> +// CHECK-NEXT: return %[[ret]] +func.func @floats_i4_to_f32(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf32> { + %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xf32> + func.return %out : tensor<8x8x2xf32> +} + +// CHECK-LABEL: @floats_f16_to_i8 +// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xf16>) +// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi8> +// CHECK: %[[ret:.+]] = linalg.generic +// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xf16>) +// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xi8>) +// CHECK-NEXT: %[[in:.+]]: f16 +// CHECK-NEXT: %[[res:.+]] = arith.fptoui %[[in]] : f16 to i8 +// CHECK-NEXT: linalg.yield %[[res]] +// CHECK-NEXT: -> tensor<8x8x2xi8> +// CHECK-NEXT: return %[[ret]] +func.func @floats_f16_to_i8(%arg0: tensor<8x8x2xf16>) -> tensor<8x8x2xi8> { + %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xf16>) -> tensor<8x8x2xi8> + func.return %out : tensor<8x8x2xi8> +} + +// ----- + +// CHECK-LABEL: @unsigned_div +// CHECK-SAME: (%[[arg0:.+]]: tensor<1x36x384x64xi32>, %[[arg1:.+]]: tensor<1x36x384x64xi32>) +// CHECK: %[[empty:.+]] = tensor.empty() : tensor<1x36x384x64xi32> +// CHECK: %[[ret:.+]] = linalg.generic +// CHECK-SAME: ins(%[[arg0]], %[[arg1]] : tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) +// CHECK-SAME: outs(%[[empty]] : tensor<1x36x384x64xi32>) +// CHECK-NEXT: %[[in:.+]]: i32, %[[in1:.+]]: i32, %[[out:.+]]: i32 +// CHECK-NEXT: %[[res:.+]] = arith.divui %[[in]], %[[in1]] : i32 +// CHECK-NEXT: linalg.yield %[[res]] +// CHECK-NEXT: -> tensor<1x36x384x64xi32> +// CHECK-NEXT: return %[[ret]] +func.func @unsigned_div(%arg0: tensor<1x36x384x64xi32>, %arg1: tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32> { + %out = tosa.custom %arg0, %arg1 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_div"} : (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32> + func.return %out : tensor<1x36x384x64xi32> +} diff --git a/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir b/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir index 1f0e37ec623b..7829cd43ad91 100644 --- a/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir +++ b/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir @@ -26,6 +26,33 @@ func.func private @mlir_conv3d(%arg0: tensor<4x1x1x1x1xf32>, %arg1: tensor<2x5x5 return %8 : tensor<2x2x2x2x4xf32> } +// CHECK-LABEL: mlir_conv1d +// CHECK: %[[convRes:.*]] = rock.conv(%{{.*}}, %{{.*}}, %{{.*}}) features = none {arch = "", dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]} : tensor<1x64x7x1x3xf32>, tensor<1x224x1x1x3xf32>, tensor<1x224x1x1x64xf32> -> tensor<1x224x1x1x64xf32> +// CHECK-NEXT: %[[castRes:.*]] = rock.tensor_untransform_cast %[[convRes]] aka %{{.*}} : tensor<1x224x1x1x64xf32> to tensor<1x224x1x64xf32> +// CHECK-NEXT: %[[reshapeRes:.*]] = tosa.reshape %[[castRes]] {new_shape = array} : (tensor<1x224x1x64xf32>) -> tensor<1x224x64xf32> + +func.func private @mlir_conv1d(%arg0: tensor<64xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {kernel, arch = ""} { + %0 = tosa.reshape %arg0 {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> + %1 = "tosa.const"() <{value = dense<[2, 0, 1]> : tensor<3xi32>}> : () -> tensor<3xi32> + %2 = tosa.transpose %0, %1 : (tensor<64x1x1xf32>, tensor<3xi32>) -> tensor<1x64x1xf32> + %3 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x64x224xf32>}> : () -> tensor<1x64x224xf32> + %4 = tosa.add %3, %2 : (tensor<1x64x224xf32>, tensor<1x64x1xf32>) -> tensor<1x64x224xf32> + %5 = tosa.reshape %arg2 {new_shape = array} : (tensor<1344xf32>) -> tensor<64x3x7xf32> + %6 = tosa.reshape %arg1 {new_shape = array} : (tensor<672xf32>) -> tensor<1x3x224xf32> + %7 = "tosa.const"() <{value = dense<[0, 2, 1]> : tensor<3xi32>}> : () -> tensor<3xi32> + %8 = tosa.transpose %6, %7 : (tensor<1x3x224xf32>, tensor<3xi32>) -> tensor<1x224x3xf32> + %9 = tosa.transpose %5, %7 : (tensor<64x3x7xf32>, tensor<3xi32>) -> tensor<64x7x3xf32> + %10 = tosa.reshape %8 {new_shape = array} : (tensor<1x224x3xf32>) -> tensor<1x224x1x3xf32> + %11 = tosa.reshape %9 {new_shape = array} : (tensor<64x7x3xf32>) -> tensor<64x7x1x3xf32> + %12 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64xf32>}> : () -> tensor<64xf32> + %13 = tosa.conv2d %10, %11, %12 {dilation = array, group = 1 : i64, pad = array, stride = array} : (tensor<1x224x1x3xf32>, tensor<64x7x1x3xf32>, tensor<64xf32>) -> tensor<1x224x1x64xf32> + %14 = tosa.reshape %13 {new_shape = array} : (tensor<1x224x1x64xf32>) -> tensor<1x224x64xf32> + %15 = tosa.transpose %14, %7 : (tensor<1x224x64xf32>, tensor<3xi32>) -> tensor<1x64x224xf32> + %16 = tosa.add %15, %4 : (tensor<1x64x224xf32>, tensor<1x64x224xf32>) -> tensor<1x64x224xf32> + %17 = tosa.reshape %16 {new_shape = array} : (tensor<1x64x224xf32>) -> tensor<14336xf32> + return %17 : tensor<14336xf32> +} + // ----- // CHECK-LABEL: mlir_dot_transpose_add diff --git a/mlir/test/Dialect/MIGraphX/realize-int4.mlir b/mlir/test/Dialect/MIGraphX/realize-int4.mlir index bf6d1637b2ef..ac682ef81b09 100644 --- a/mlir/test/Dialect/MIGraphX/realize-int4.mlir +++ b/mlir/test/Dialect/MIGraphX/realize-int4.mlir @@ -1,21 +1,30 @@ // RUN: rocmlir-opt -migraphx-realize-int4 --split-input-file %s | FileCheck %s -// CHECK-LABEL: @basic_signed +// CHECK-LABEL: @basic_signless // CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xi4, 4x1>) -> !migraphx.shaped<8x4xi8, 4x1> -func.func @basic_signed(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> { +func.func @basic_signless(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> { // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xi4, 4x1> to <8x4xi8, 4x1> // CHECK: return %[[extended]] - %y = migraphx.unpack %x {axis = 1 : i64, isUnsigned = false} : <8x2xi8, 2x1> -> <8x4xi8, 4x1> + %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xi8, 2x1> -> <8x4xi8, 4x1> func.return %y : !migraphx.shaped<8x4xi8, 4x1> } +// CHECK-LABEL: @basic_signed +// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xsi4, 4x1>) -> !migraphx.shaped<8x4xsi8, 4x1> +func.func @basic_signed(%x: !migraphx.shaped<8x2xsi8, 2x1>) -> !migraphx.shaped<8x4xsi8, 4x1> { + // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xsi4, 4x1> to <8x4xsi8, 4x1> + // CHECK: return %[[extended]] + %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xsi8, 2x1> -> <8x4xsi8, 4x1> + func.return %y : !migraphx.shaped<8x4xsi8, 4x1> +} + // CHECK-LABEL: @basic_unsigned -// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xi4, 4x1>) -> !migraphx.shaped<8x4xi8, 4x1> -func.func @basic_unsigned(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> { - // CHECK: %[[extended:.+]] = migraphx.convert zero_extend %[[x]] : <8x4xi4, 4x1> to <8x4xi8, 4x1> +// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xui4, 4x1>) -> !migraphx.shaped<8x4xui8, 4x1> +func.func @basic_unsigned(%x: !migraphx.shaped<8x2xui8, 2x1>) -> !migraphx.shaped<8x4xui8, 4x1> { + // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xui4, 4x1> to <8x4xui8, 4x1> // CHECK: return %[[extended]] - %y = migraphx.unpack %x {axis = 1 : i64, isUnsigned = true} : <8x2xi8, 2x1> -> <8x4xi8, 4x1> - func.return %y : !migraphx.shaped<8x4xi8, 4x1> + %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xui8, 2x1> -> <8x4xui8, 4x1> + func.return %y : !migraphx.shaped<8x4xui8, 4x1> } // CHECK-LABEL: @transpose @@ -25,7 +34,7 @@ func.func @basic_unsigned(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped // CHECK: migraphx.convert %[[transposed]] func.func @transposed(%x: !migraphx.shaped<9x2x8xi8, 16x1x2>) -> !migraphx.shaped<9x8x4xi8, 32x4x1> { %transposed = migraphx.transpose %x {permutation = [0, 2, 1]} : <9x2x8xi8, 16x1x2> -> <9x8x2xi8, 16x2x1> - %y = migraphx.unpack %transposed {axis = 2 : i64, isUnsigned = false} : <9x8x2xi8, 16x2x1> -> <9x8x4xi8, 32x4x1> + %y = migraphx.unpack %transposed {axis = 2 : i64} : <9x8x2xi8, 16x2x1> -> <9x8x4xi8, 32x4x1> return %y : !migraphx.shaped<9x8x4xi8, 32x4x1> } @@ -36,7 +45,7 @@ func.func @transposed(%x: !migraphx.shaped<9x2x8xi8, 16x1x2>) -> !migraphx.shape // CHECK: migraphx.convert %[[reshaped]] func.func @reshape_expand(%x: !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<9x2x8xi8, 16x8x1> { %reshaped = migraphx.reshape %x {dims = [9, 2, 4]} : <9x8xi8, 8x1> -> <9x2x4xi8, 8x4x1> - %y = migraphx.unpack %reshaped {axis = 2 : i64, isUnsigned = false} : <9x2x4xi8, 8x4x1> -> <9x2x8xi8, 16x8x1> + %y = migraphx.unpack %reshaped {axis = 2 : i64} : <9x2x4xi8, 8x4x1> -> <9x2x8xi8, 16x8x1> func.return %y : !migraphx.shaped<9x2x8xi8, 16x8x1> } @@ -47,7 +56,7 @@ func.func @reshape_expand(%x: !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped // CHECK: migraphx.convert %[[reshaped]] func.func @reshape_collapse(%x: !migraphx.shaped<9x2x4xi8, 8x4x1>) -> !migraphx.shaped<9x16xi8, 16x1> { %reshaped = migraphx.reshape %x {dims = [9, 8]} : <9x2x4xi8, 8x4x1> -> <9x8xi8, 8x1> - %y = migraphx.unpack %reshaped {axis = 1 : i64, isUnsigned = false} : <9x8xi8, 8x1> -> <9x16xi8, 16x1> + %y = migraphx.unpack %reshaped {axis = 1 : i64} : <9x8xi8, 8x1> -> <9x16xi8, 16x1> func.return %y : !migraphx.shaped<9x16xi8, 16x1> } @@ -58,6 +67,6 @@ func.func @reshape_collapse(%x: !migraphx.shaped<9x2x4xi8, 8x4x1>) -> !migraphx. // CHECK: migraphx.convert %[[mbcast]] func.func @multibroadcast(%x: !migraphx.shaped<1x4x1xi8, 1x1x1>) -> !migraphx.shaped<4x8x3xi8, 0x1x0> { %mbcast = migraphx.multibroadcast %x {out_lens = [4, 4, 3]} : <1x4x1xi8, 1x1x1> -> <4x4x3xi8, 0x1x0> - %y = migraphx.unpack %mbcast {axis = 1 : i64, isUnsigned = false} : <4x4x3xi8, 0x1x0> -> <4x8x3xi8, 0x1x0> + %y = migraphx.unpack %mbcast {axis = 1 : i64} : <4x4x3xi8, 0x1x0> -> <4x8x3xi8, 0x1x0> func.return %y : !migraphx.shaped<4x8x3xi8, 0x1x0> } diff --git a/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir b/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir new file mode 100644 index 000000000000..2e6b6e35f42b --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir @@ -0,0 +1,8 @@ +// RUN: rocmlir-gen -fut mlir_convolution_add --arch %arch --clone-harness %s | rocmlir-driver -kernel-pipeline=migraphx | rocmlir-driver -host-pipeline=migraphx,highlevel | rocmlir-gen -ph -rand 1 -rand_type float -fut mlir_convolution_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +func.func private @mlir_convolution_add(%arg0: !migraphx.shaped<1x64x224xf32, 0x1x0>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> { + %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1> + %1 = migraphx.add %0, %arg0 : <1x64x224xf32, 14336x224x1>, <1x64x224xf32, 0x1x0> -> <1x64x224xf32, 14336x224x1> + return %1 : !migraphx.shaped<1x64x224xf32, 14336x224x1> +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir new file mode 100644 index 000000000000..aaefbbaf0f77 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir @@ -0,0 +1,24 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [0, 3, -2, -1, -1, 4, -9, -1, -2, 3, -6, 0, -3, 2, -1, 4, 4, -10, 4, 1, -11, 2, -5, -8, -9, -8, -2, -1, 3, -9, -1, -9, 3, -1, -5, -1, -4, -11, -7, -8, 1, -7, -11, -7, -1, -5, -7, 3, -7] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xsi8, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1> + %arg4 = migraphx.literal (dense<-1> : tensor<1x1x7x7xsi8>) : <1x1x7x7xsi8, 7x7x7x1> + %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xsi8, 7x7x7x1> -> <1x1x7x7xsi8, 49x49x7x1> + return %4 : !migraphx.shaped<1x1x7x7xsi8, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir new file mode 100644 index 000000000000..1108c388cb92 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir @@ -0,0 +1,23 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [1, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 1, 0, 3, 0, 5, 5, 0, 5, 2, 0, 3, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1> + %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + %4 = migraphx.quantizelinear %3, %arg3 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1> + return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir new file mode 100644 index 000000000000..7b669f7c2b08 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir @@ -0,0 +1,25 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [-4, -1, -6, -5, -5, -1, -13, -5, -6, -1, -10, -4, -7, -2, -5, -1, -1, -14, -1, -3, -15, -2, -9, -12, -13, -12, -6, -5, -1, -13, -5, -13, -1, -5, -9, -5, -8, -15, -11, -12, -3, -11, -15, -11, -5, -9, -11, -1, -11] +// COM: tests fail is they have no arguments, that's why we have %dummy +// COM: Note that values are negative because they are printed as signed integers, TODO: fix this +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1> + %arg4 = migraphx.literal (dense<251> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1> + %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1> + return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir new file mode 100644 index 000000000000..d5fb8198b45c --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir @@ -0,0 +1,24 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [2, 5, 0, 1, 1, 6, 0, 1, 0, 5, 0, 2, 0, 4, 1, 6, 6, 0, 6, 3, 0, 4, 0, 0, 0, 0, 0, 1, 5, 0, 1, 0, 5, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 5, 0] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1> + %arg4 = migraphx.literal (dense<1> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1> + %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1> + return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir new file mode 100644 index 000000000000..a4235d2ec585 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir @@ -0,0 +1,24 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [19, 22, 17, 18, 18, 23, 10, 18, 17, 22, 13, 19, 16, 21, 18, 23, 23, 9, 23, 20, 8, 21, 14, 11, 10, 11, 17, 18, 22, 10, 18, 10, 22, 18, 14, 18, 15, 8, 12, 11, 20, 12, 8, 12, 18, 14, 12, 22, 12] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1> + %arg4 = migraphx.literal (dense<18> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1> + %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1> + return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir new file mode 100644 index 000000000000..ca68ac0ca17c --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir @@ -0,0 +1,21 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [1.3125, 0.84375, 1.6875, 1.59375, 1.59375, 0.5625, 3.09375, 1.40625, 1.6875, 0.65625, 2.4375, 1.3125, 1.96875, 0.9375, 1.5, 0.5625, 0.5625, 3.1875, 0.5625, 1.03125, 3.375, 0.9375, 2.34375, 2.8125, 3.09375, 2.8125, 1.6875, 1.59375, 0.65625, 3.09375, 1.40625, 2.90625, 0.84375, 1.40625, 2.34375, 1.40625, 2.0625, 3.28125, 2.625, 2.8125, 1.03125, 2.53125, 3.375, 2.71875, 1.40625, 2.25, 2.625, 0.84375, 2.53125] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[-7, -12, -3, -4, -4, -15, 12, -6, -3, -14, 5, -7, 0, -11, -5, -15, -15, 13, -15, -10, 15, -11, 4, 9, 12, 9, -3, -4, -14, 12, -6, 10, -12, -6, 4, -6, 1, 14, 7, 9, -10, 6, 15, 8, -6, 3, 7, -12, 6]> : tensor<49xsi8>) : <49xsi8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<-21> : tensor<1xsi8>) : <1xsi8, 1> + %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xsi8, 1> -> <1x1x7x7xsi8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xsi8, 1> -> <1x1x7x7xsi8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xsi8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xsi8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir new file mode 100644 index 000000000000..8732e6857460 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir @@ -0,0 +1,19 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [2.15625, 2.625, 1.78125, 1.875, 1.875, 2.90625, 0.375, 2.0625, 1.78125, 2.8125, 1.03125, 2.15625, 1.5, 2.53125, 1.96875, 2.90625, 2.90625, 0.28125, 2.90625, 2.4375, 0.09375, 2.53125, 1.125, 0.65625, 0.375, 0.65625, 1.78125, 1.875, 2.8125, 0.375, 2.0625, 0.5625, 2.625, 2.0625, 1.125, 2.0625, 1.40625, 0.1875, 0.84375, 0.65625, 2.4375, 0.9375, 0.09375, 0.75, 2.0625, 1.21875, 0.84375, 2.625, 0.9375] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir new file mode 100644 index 000000000000..72f8520e0723 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir @@ -0,0 +1,21 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data = +// CHECK-NEXT: [0.1875, 0.65625, -0.1875, -0.09375, -0.09375, 0.9375, -1.59375, 0.09375, -0.1875, 0.84375, -0.9375, 0.1875, -0.46875, 0.5625, 0, 0.9375, 0.9375, -1.6875, 0.9375, 0.46875, -1.875, 0.5625, -0.84375, -1.3125, -1.59375, -1.3125, -0.1875, -0.09375, 0.84375, -1.59375, 0.09375, -1.40625, 0.65625, 0.09375, -0.84375, 0.09375, -0.5625, -1.78125, -1.125, -1.3125, 0.46875, -1.03125, -1.875, -1.21875, 0.09375, -0.75, -1.125, 0.65625, -1.03125] +// COM: tests fail is they have no arguments, that's why we have %dummy +module { + func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> { + %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1> + %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1> + %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1> + %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1> + + %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1> + %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0> + %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0> + %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1> + %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1> + return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir index 1b8d21c8f4e1..725e19176027 100644 --- a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir +++ b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir @@ -5,8 +5,8 @@ module { func.func @mlir_unpack_int4_1645(%arg0: !migraphx.shaped<2x2xi8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xi8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} { - %0 = migraphx.unpack %arg0 {axis = 1 : i64, isUnsigned = false} : <2x2xi8, 2x1> -> <2x4xi8, 4x1> - %1 = migraphx.unpack %arg2 {axis = 1 : i64, isUnsigned = false} : <2x1xi8, 1x1> -> <2x2xi8, 2x1> + %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xi8, 2x1> -> <2x4xi8, 4x1> + %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xi8, 1x1> -> <2x2xi8, 2x1> %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1> diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir index 983930b4194e..927ddcfff760 100644 --- a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir +++ b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir @@ -9,7 +9,7 @@ func.func private @mlir_unpack_dequantizelinear_dot(%arg0: !migraphx.shaped<1x4x8xi8, 32x8x1>, %arg1: !migraphx.shaped<1x16x4xf16, 64x4x1>) -> !migraphx.shaped<1x4x4xf16, 16x4x1> { %0 = migraphx.literal (dense<[0.25]> : tensor<1xf16>) : <1xf16, 0> %1 = migraphx.multibroadcast %0 {out_dyn_dims = [], out_lens = [1, 5, 16]} : <1xf16, 0> -> <1x4x16xf16, 0x0x0> - %2 = migraphx.unpack %arg0 {axis = 2 : i64, isUnsigned = false} : <1x4x8xi8, 32x8x1> -> <1x4x16xi8, 64x16x1> + %2 = migraphx.unpack %arg0 {axis = 2 : i64} : <1x4x8xi8, 32x8x1> -> <1x4x16xi8, 64x16x1> %3 = migraphx.dequantizelinear %2, %1 : <1x4x16xi8, 64x16x1>, <1x4x16xf16, 0x0x0> -> <1x4x16xf16, 64x16x1> %4 = migraphx.dot %3, %arg1 : <1x4x16xf16, 64x16x1>, <1x16x4xf16, 64x4x1> -> <1x4x4xf16, 16x4x1> return %4 : !migraphx.shaped<1x4x4xf16, 16x4x1> diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir new file mode 100644 index 000000000000..18e26007a90e --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir @@ -0,0 +1,28 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_unpack_uint4_f16_uint32 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut mlir_unpack_uint4_f16_uint32_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// COM: Runs the MIGraphX pipeline first to rewrite out the int4 +module { + func.func @mlir_unpack_uint4_f16_uint32(%arg0: !migraphx.shaped<2x2xui8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xui8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} + { + %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xui8, 2x1> -> <2x4xui8, 4x1> + %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xui8, 1x1> -> <2x2xui8, 2x1> + %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> + %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> + %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1> + %5 = migraphx.reshape %1 {dims = [2, 2, 1]} : <2x2xui8, 2x1> -> <2x2x1xui8, 2x1x1> + %6 = migraphx.multibroadcast %5 {out_dyn_dims = [], out_lens = [2, 2, 2]} : <2x2x1xui8, 2x1x1> -> <2x2x2xui8, 2x1x0> + %7 = migraphx.reshape %6 {dims = [2, 4]} : <2x2x2xui8, 2x1x0> -> <2x4xui8, 4x1> + %8 = migraphx.dequantizelinear %0, %4, %7 : <2x4xui8, 4x1>, <2x4xf16, 4x1>, !migraphx.shaped<2x4xui8, 4x1> -> <2x4xf16, 4x1> + %9 = migraphx.transpose %8 {permutation = [1, 0]} : <2x4xf16, 4x1> -> <4x2xf16, 1x4> + %10 = migraphx.dot %9, %arg3 : <4x2xf16, 1x4>, <2x4xf16, 4x1> -> <4x4xf16, 4x1> + %11 = migraphx.relu %10 : <4x4xf16, 4x1> -> <4x4xf16, 4x1> + %12 = migraphx.convert %11 : <4x4xf16, 4x1> to <4x4xui32, 4x1> + %13 = migraphx.literal (dense<1> : tensor<1xui8>) : <1xui8, 0> + %14 = migraphx.multibroadcast %13 {out_dyn_dims = [], out_lens = [4, 4]} : <1xui8, 0> -> <4x4xui8, 0x0> + %15 = migraphx.literal (dense<1.0> : tensor<1xf16>) : <1xf16, 0> + %16 = migraphx.multibroadcast %15 {out_dyn_dims = [], out_lens = [4, 4]} : <1xf16, 0> -> <4x4xf16, 0x0> + %17 = migraphx.dequantizelinear %12, %16, %14 : <4x4xui32, 4x1>, <4x4xf16, 0x0>, !migraphx.shaped<4x4xui8, 0x0> -> <4x4xf16, 4x1> + return %17 : !migraphx.shaped<4x4xf16, 4x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir new file mode 100644 index 000000000000..c0af377c3ff4 --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir @@ -0,0 +1,21 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_unpack_uint4_f16 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut mlir_unpack_uint4_f16_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +// COM: Runs the MIGraphX pipeline first to rewrite out the int4 +module { + func.func @mlir_unpack_uint4_f16(%arg0: !migraphx.shaped<2x2xui8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xui8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} + { + %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xui8, 2x1> -> <2x4xui8, 4x1> + %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xui8, 1x1> -> <2x2xui8, 2x1> + %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> + %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> + %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1> + %5 = migraphx.reshape %1 {dims = [2, 2, 1]} : <2x2xui8, 2x1> -> <2x2x1xui8, 2x1x1> + %6 = migraphx.multibroadcast %5 {out_dyn_dims = [], out_lens = [2, 2, 2]} : <2x2x1xui8, 2x1x1> -> <2x2x2xui8, 2x1x0> + %7 = migraphx.reshape %6 {dims = [2, 4]} : <2x2x2xui8, 2x1x0> -> <2x4xui8, 4x1> + %8 = migraphx.dequantizelinear %0, %4, %7 : <2x4xui8, 4x1>, <2x4xf16, 4x1>, !migraphx.shaped<2x4xui8, 4x1> -> <2x4xf16, 4x1> + %9 = migraphx.transpose %8 {permutation = [1, 0]} : <2x4xf16, 4x1> -> <4x2xf16, 1x4> + %10 = migraphx.dot %9, %arg3 : <4x2xf16, 1x4>, <2x4xf16, 4x1> -> <4x4xf16, 4x1> + return %10 : !migraphx.shaped<4x4xf16, 4x1> + } +} diff --git a/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir b/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir new file mode 100644 index 000000000000..b02d64b7383e --- /dev/null +++ b/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir @@ -0,0 +1,24 @@ +// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut migraphx_div_ui32 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut migraphx_div_ui32_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s +// ALLOW_RETRIES: 2 +// CHECK: [1 1 1] +module { + + func.func @migraphx_div_ui32(%arg0: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg1: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg2: !migraphx.shaped<1x2048x7x7xf32, 100352x49x7x1>, %arg3: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg4: !migraphx.shaped<1x1024x14x14xf32, 200704x196x14x1>, %arg5: !migraphx.shaped<2048x1024x1x1xf32, 1024x1x1x1>) -> !migraphx.shaped<1x2048x7x7xui32, 100352x49x7x1> { + %0 = migraphx.multibroadcast %arg3 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0> + %1 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0> + %2 = migraphx.multibroadcast %arg0 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0> + %3 = migraphx.convolution %arg4, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2]} : <1x1024x14x14xf32, 200704x196x14x1>, <2048x1024x1x1xf32, 1024x1x1x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %4 = migraphx.mul %2, %3 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %5 = migraphx.mul %1, %4 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %6 = migraphx.mul %2, %arg2 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %7 = migraphx.mul %1, %6 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %8 = migraphx.add %7, %5 : <1x2048x7x7xf32, 100352x49x7x1>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %9 = migraphx.add %8, %0 : <1x2048x7x7xf32, 100352x49x7x1>, <1x2048x7x7xf32, 0x1x0x0> -> <1x2048x7x7xf32, 100352x49x7x1> + %10 = migraphx.literal (dense<[2]> : tensor<1xui32>) : <1xui32, 0> + %11 = migraphx.multibroadcast %10 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1xui32, 0> -> <1x2048x7x7xui32, 0x0x0x0> + %12 = migraphx.relu %9 : <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1> + %13 = migraphx.convert %12 : <1x2048x7x7xf32, 100352x49x7x1> to <1x2048x7x7xui32, 100352x49x7x1> + %14 = migraphx.div %13, %11 : <1x2048x7x7xui32, 100352x49x7x1>, <1x2048x7x7xui32, 0x0x0x0> -> <1x2048x7x7xui32, 100352x49x7x1> + return %14 : !migraphx.shaped<1x2048x7x7xui32, 100352x49x7x1> + } +}