diff --git a/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td b/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td
index 79c0b35d0ca4..1d050d408618 100644
--- a/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td
+++ b/mlir/include/mlir/Dialect/MIGraphX/IR/MIGraphX.td
@@ -98,7 +98,7 @@ def MIGraphX_ClipOp :
 // Keep that logic here.
 def MIGraphX_WhereOp :
     MIGraphX_Op<"where">,
-    Arguments<(ins MIXRShapedOf<[I8]>:$cond,
+    Arguments<(ins MIXRShapedOf<[I8, SI8, UI8]>:$cond,
       AnyMIXRShaped:$inA,
       AnyMIXRShaped:$inB)>,
     Results<(outs AnyMIXRShaped:$output)> {
@@ -117,18 +117,14 @@ def MIGraphX_WhereOp :
 
 def MIGraphX_ConvertOp :
     MIGraphX_Op<"convert">,
-    Arguments<(ins AnyMIXRShaped:$inA, UnitAttr:$zeroExtend)>,
+    Arguments<(ins AnyMIXRShaped:$inA)>,
 	  Results<(outs AnyMIXRShaped:$output)> {
   let summary = "Elementwise type conversion";
   let description = [{
     Type conversion. Due to impedance mismatches between MIGraphX and Tosa,
     currently only supports float to float conversions
-
-    If zeroExtend is set, the input is treated as an unsigned integer.
-    This is MLIR-specific, since MIGraphX encodes integer signedness in types
-    but MLIR generally uses signless integers.
   }];
-  let assemblyFormat = "(`zero_extend` $zeroExtend^)? $inA attr-dict `:` type($inA) `to` type($output)";
+  let assemblyFormat = "$inA attr-dict `:` type($inA) `to` type($output)";
 }
 
 class MIGraphX_ElementwiseUnaryOp<string name, list<Trait> traits=[]> :
@@ -181,10 +177,9 @@ def MIGraphX_TanhOp :
 
 // int4 operations
 def MIGraphX_UnpackOp : MIGraphX_Op<"unpack">,
-    Arguments<(ins MIXRShapedOf<[I8, I<4>]>:$in,
-                   I64Attr:$axis,
-                   BoolAttr:$isUnsigned)>,
-    Results<(outs MIXRShapedOf<[I8, I<4>]>:$out)> {
+    Arguments<(ins MIXRShapedOf<[I8, UI8, SI8, I<4>, SI<4>, UI<4>]>:$in,
+                   I64Attr:$axis)>,
+    Results<(outs MIXRShapedOf<[I8, UI8, SI8, I<4>, SI<4>, UI<4>]>:$out)> {
   let summary = "Unpack int4 vaules stored as bytes";
   let description = [{
     Given a shaped tensor of bytes, double the length of `axis` by
@@ -201,9 +196,6 @@ def MIGraphX_UnpackOp : MIGraphX_Op<"unpack">,
     the corresponding tensor of i8 (in which case, the `i4` are exposed as an
     extra dimension and not flattened) or another tensor of i4. This allows us to
     progressively move unpack up to function boundaries.
-
-    If `isUnsigned` is true, the inputs are a buffer of unsigned 4-bit ints,
-    otherwise they are signed.
   }];
 
   let assemblyFormat = [{ $in attr-dict `:` type($in) `->` type($out) }];
@@ -333,7 +325,7 @@ class MIGraphX_ConvOpBase<string mnemonic, list<Type> inputTypes=[], list<Type>
 }
 
 def MIGraphX_QuantConvolutionOp :
-    MIGraphX_ConvOpBase<"quant_convolution", [F8E4M3FNUZ, F8E5M2FNUZ, F8E5M2, F8E4M3FN, I8], [F32, I32]> {
+    MIGraphX_ConvOpBase<"quant_convolution", [F8E4M3FNUZ, F8E5M2FNUZ, F8E5M2, F8E4M3FN, I8, SI8], [F32, I32, SI32]> {
   let summary = "quantized convolution forward";
   let description = [{
     The `migraphx.quant_convolution` op computes quantized convolution forward.
@@ -510,7 +502,7 @@ class MIGraphX_DotOpBase<string mnemonic, list<Type> inputTypes=[], list<Type> o
 }
 
 def MIGraphX_QuantDotOp :
-    MIGraphX_DotOpBase<"quant_dot", [F8E4M3FNUZ, F8E5M2FNUZ, F8E4M3FN, F8E5M2, I8], [F32, I32]>{
+    MIGraphX_DotOpBase<"quant_dot", [F8E4M3FNUZ, F8E5M2FNUZ, F8E4M3FN, F8E5M2, I8, SI8], [F32, I32, SI32]>{
   let summary = "Dot product of quantized tensors";
   let description = [{
     The `migraphx.quant_dot` op computes the dot product of two tensors.
diff --git a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
index 82579d9db0b9..a8944d4f65e8 100644
--- a/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
+++ b/mlir/include/mlir/Dialect/Rock/IR/RockAttrDefs.td
@@ -472,11 +472,6 @@ def Rock_EnableSplitKForTuning : Rock_Attr<"EnableSplitKForTuning"> {
   let mnemonic = "enable_splitk_for_tuning";
 }
 
-// Used when converting 1-dimensional migraphx.convolution to tosa.conv2d
-def Rock_ExpandedFrom1D : Rock_Attr<"ExpandedFrom1D"> {
-  let mnemonic = "expanded_from_1d";
-}
-
 def Rock_PrefillAttr : Rock_Attr<"Prefill"> {
   let mnemonic = "rock.prefill";
 
diff --git a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
index f1b256998be7..7cd63080c529 100644
--- a/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
+++ b/mlir/lib/Conversion/MIGraphXToTosa/MIGraphXToTosa.cpp
@@ -41,8 +41,27 @@ using mlir::migraphx::MIXRShapedType;
 //===----------------------------------------------------------------------===//
 
 migraphx::MIXRShapedToTensorConverter::MIXRShapedToTensorConverter() {
-  addConversion([](Type type) { return type; });
-  addConversion([](MIXRShapedType shaped) { return shaped.asTensor(); });
+  addConversion([](Type type) {
+    if (type.isInteger() && !type.isSignlessInteger()) {
+      type = IntegerType::get(type.getContext(), type.getIntOrFloatBitWidth(),
+                              IntegerType::SignednessSemantics::Signless);
+    }
+    return type;
+  });
+  addConversion([](MIXRShapedType shaped) {
+    RankedTensorType newType = shaped.asTensor();
+    Type elementType = newType.getElementType();
+
+    // Convert to signless if the element type is a signed integer
+    if (elementType.isInteger() && !elementType.isSignlessInteger()) {
+      elementType = IntegerType::get(
+          shaped.getContext(), elementType.getIntOrFloatBitWidth(),
+          IntegerType::SignednessSemantics::Signless);
+      // Create a new tensor type with the signless element type
+      newType = RankedTensorType::get(newType.getShape(), elementType);
+    }
+    return newType;
+  });
 
   addSourceMaterialization([](OpBuilder &b, MIXRShapedType shapedResType,
                               ValueRange tensorResult,
@@ -92,13 +111,28 @@ static TosaOp createOpAndInfer(PatternRewriter &rewriter, Location loc,
   return op;
 }
 
-static tosa::CastOp createCastOp(PatternRewriter &rewriter, Location loc,
-                                 Type resElementType, Value input) {
-  ShapedType inputType = cast<ShapedType>(input.getType());
-  Type resType = inputType.cloneWith({}, resElementType);
-
-  auto op = rewriter.create<tosa::CastOp>(loc, resType, input);
-  return op;
+static Value createCastOp(PatternRewriter &rewriter, Location loc,
+                          Type resElementType, Value input, Type inputType,
+                          Type resElementTypeBeforeConvert = nullptr) {
+  ShapedType shapedInputType = cast<ShapedType>(input.getType());
+  Type resType = shapedInputType.cloneWith({}, resElementType);
+
+  if (!resElementTypeBeforeConvert)
+    resElementTypeBeforeConvert = resElementType;
+
+  Value res;
+  if (inputType.isUnsignedInteger() ||
+      resElementTypeBeforeConvert.isUnsignedInteger()) {
+    assert(!inputType.isSignedInteger() &&
+           !resElementTypeBeforeConvert.isSignedInteger());
+    res = rewriter
+              .create<tosa::CustomOp>(loc, resType, "unsigned_cast", "rocmlir",
+                                      "", input)
+              .getResult(0);
+  } else {
+    res = rewriter.create<tosa::CastOp>(loc, resType, input).getResult();
+  }
+  return res;
 }
 
 static Type getShapedElementTy(Value v) {
@@ -181,6 +215,7 @@ namespace {
 template <typename ConvType>
 struct ConvConverter final : public OpConversionPattern<ConvType> {
   using OpConversionPattern<ConvType>::OpConversionPattern;
+  using OpConversionPattern<ConvType>::getTypeConverter;
   using OpAdaptor = typename OpConversionPattern<ConvType>::OpAdaptor;
 
   // Note, this lowering pattern works for both migraphx.convolution and
@@ -215,9 +250,14 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
   ValueRange results = op->getResults();
   Type elementTy = inputType.getElementType();
   auto outputTy = cast<MIXRShapedType>(results[0].getType());
+  Type outElementTy = outputTy.getElementType();
+  Type newOutElementTy = getTypeConverter()->convertType(outElementTy);
   SmallVector<int64_t> NCHW2NHWC{0, 2, 3, 1};
   SmallVector<int64_t> NHWC2NCHW{0, 3, 1, 2};
 
+  if (outElementTy.isUnsignedInteger())
+    return op.emitError("No support for unsigned convolution.\n");
+
   int dims = outputTy.getShape().size() - 2;
   SmallVector<int32_t> toChannelLast{0};
   SmallVector<int32_t> fromChannelLast{0, dims + 1};
@@ -237,11 +277,10 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
   for (int i = 0; i < dims; i++)
     newShape.push_back(outShape[i + 2]);
   newShape.push_back(outShape[1]);
-  Type newOutTy = RankedTensorType::get(newShape, outputTy.getElementType());
+  Type newOutTy = RankedTensorType::get(newShape, newOutElementTy);
 
   // There is no tosa.conv1d, so instead we'll add a dummy x1 dimension
-  // to the input tensors, and make a tosa.conv2d.  We'll also add the
-  // ExpandedFrom1D attribute so we can undo it in tosa-to-rock.
+  // to the input tensors, and make a tosa.conv2d.
   auto expandTo2D = [&rewriter, loc](mlir::Value value) {
     ArrayRef<int64_t> origShape = cast<ShapedType>(value.getType()).getShape();
     SmallVector<int64_t> expShape(origShape.drop_back());
@@ -259,7 +298,7 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
   case 1:
     // Expand to do a conv2d, because there's no conv1d op.
     newShape.insert(std::prev(newShape.end()), 1);
-    new1DOutTy = RankedTensorType::get(newShape, outputTy.getElementType());
+    new1DOutTy = RankedTensorType::get(newShape, newOutElementTy);
     input = expandTo2D(input);
     filter = expandTo2D(filter);
 
@@ -267,11 +306,9 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
         loc, new1DOutTy,
         ValueRange{
             input, filter,
-            getZeroTensor(loc, outputTy.getElementType(),
+            getZeroTensor(loc, newOutElementTy,
                           cast<ShapedType>(filter.getType()).getShape()[0],
                           rewriter)});
-    cop->setAttr(rock::ExpandedFrom1DAttr::getMnemonic(),
-                 rewriter.getUnitAttr());
     break;
 
   case 2:
@@ -279,7 +316,7 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
         loc, newOutTy,
         ValueRange{
             input, filter,
-            getZeroTensor(loc, outputTy.getElementType(),
+            getZeroTensor(loc, newOutElementTy,
                           cast<ShapedType>(filter.getType()).getShape()[0],
                           rewriter)});
     break;
@@ -288,7 +325,7 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
         loc, newOutTy,
         ValueRange{
             input, filter,
-            getZeroTensor(loc, outputTy.getElementType(),
+            getZeroTensor(loc, newOutElementTy,
                           cast<ShapedType>(filter.getType()).getShape()[0],
                           rewriter)});
     break;
@@ -326,7 +363,7 @@ LogicalResult ConvConverter<ConvType>::matchAndRewrite(
       return op->emitError(
           "1-D convolution has improper dilation, stride, or pad.");
     }
-    dilations.push_back(0);
+    dilations.push_back(1);
     strides.push_back(1);
     pads.push_back(0);
     pads.push_back(0);
@@ -377,12 +414,17 @@ LogicalResult DotConverter<DotType>::matchAndRewrite(
   auto results = op->getResults();
   Type elementTy = inA.getType().getElementType();
   auto origOutputTy = cast<MIXRShapedType>(results[0].getType());
+  Type outElementTy = origOutputTy.getElementType();
+  Type newOutElementTy = getTypeConverter()->convertType(outElementTy);
+
+  if (outElementTy.isUnsignedInteger())
+    return op.emitError("No support for unsigned dot product.\n");
 
   // check batch dimension. Tosa matmul only allow a single dimension for it,
   // add reshape ops to flatten and restore the original dimension.
   ArrayRef<int64_t> origOutDims = origOutputTy.getShape();
   RankedTensorType newOutType =
-      RankedTensorType::get(origOutDims, origOutputTy.getElementType());
+      RankedTensorType::get(origOutDims, newOutElementTy);
   size_t outRank = origOutDims.size();
   ArrayRef<int64_t> orgDimsA = inA.getType().getShape();
   ArrayRef<int64_t> orgDimsB = inB.getType().getShape();
@@ -426,8 +468,7 @@ LogicalResult DotConverter<DotType>::matchAndRewrite(
     }
     RankedTensorType newAType = RankedTensorType::get(newDimsA, elementTy);
     RankedTensorType newBType = RankedTensorType::get(newDimsB, elementTy);
-    newOutType =
-        RankedTensorType::get(newDimsOut, origOutputTy.getElementType());
+    newOutType = RankedTensorType::get(newDimsOut, newOutElementTy);
     auto reshapeAOp = rewriter.create<tosa::ReshapeOp>(
         loc, newAType, inA, rewriter.getDenseI64ArrayAttr(newDimsA));
     auto reshapeBOp = rewriter.create<tosa::ReshapeOp>(
@@ -527,6 +568,7 @@ BroadcastConverter::matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor,
   ArrayRef<int64_t> outShape = op.getOutput().getType().getShape();
   uint32_t outRank = op.getOutput().getType().getRank();
   Type elemType = op.getOutput().getType().getElementType();
+  Type newOutElementTy = getTypeConverter()->convertType(elemType);
   auto axis =
       static_cast<size_t>(cast<IntegerAttr>(op->getAttr("axis")).getInt());
 
@@ -539,15 +581,15 @@ BroadcastConverter::matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor,
     }
   }
   tosa::ReshapeOp sameRankReshapedOp = createOpAndInfer<tosa::ReshapeOp>(
-      rewriter, loc, elemType, adaptor.getInput(),
+      rewriter, loc, newOutElementTy, adaptor.getInput(),
       rewriter.getDenseI64ArrayAttr(newShape));
 
-  auto outType = RankedTensorType::get(outShape, elemType);
+  auto outType = RankedTensorType::get(outShape, newOutElementTy);
   // We create a dummy zero addition with implicit broadcasting
   // because tosa does not have an explicit broadcast op
   auto zeroTensor = getZeroTensor(loc, outType, rewriter);
   auto addWithZero = createOpAndInfer<tosa::AddOp>(
-      rewriter, loc, elemType, zeroTensor, sameRankReshapedOp);
+      rewriter, loc, newOutElementTy, zeroTensor, sameRankReshapedOp);
 
   rewriter.replaceOp(op, addWithZero);
   return success();
@@ -792,8 +834,21 @@ DivConverter::matchAndRewrite(migraphx::DivOp op, OpAdaptor adaptor,
   auto inBTensor = cast<TypedValue<RankedTensorType>>(adaptor.getInB());
   Type elementType = inATensor.getType().getElementType();
   if (isa<IntegerType>(elementType)) {
-    Value div = createOpAndInfer<tosa::IntDivOp>(rewriter, loc, elementType,
-                                                 inATensor, inBTensor);
+    auto origAElementType = op.getInA().getType().getElementType();
+    auto origBElementType = op.getInB().getType().getElementType();
+    Value div;
+    if (origAElementType.isUnsignedInteger() ||
+        origBElementType.isUnsignedInteger()) {
+      if (origAElementType != origBElementType)
+        return op->emitError("Types of A and B must be the same");
+      mlir::SmallVector<mlir::Value, 2> inputs = {inATensor, inBTensor};
+      auto op = rewriter.create<tosa::CustomOp>(
+          loc, inATensor.getType(), "unsigned_div", "rocmlir", "", inputs);
+      div = op->getResult(0);
+    } else {
+      div = createOpAndInfer<tosa::IntDivOp>(rewriter, loc, elementType,
+                                             inATensor, inBTensor);
+    }
     rewriter.replaceOp(op, div);
     return success();
   }
@@ -884,12 +939,17 @@ LogicalResult DeQuantizeLinearConverter::matchAndRewrite(
   Value output = op.getOutput();
   Location loc = op->getLoc();
 
-  Type outputType = getShapedElementTy(output);
-  Value upcastInput = createCastOp(rewriter, loc, outputType, input);
+  Type origOutputType = getShapedElementTy(output);
+  Type outputType = getTypeConverter()->convertType(origOutputType);
+  Value upcastInput =
+      createCastOp(rewriter, loc, outputType, input,
+                   op.getInput().getType().getElementType(), origOutputType);
 
   Value shifted = upcastInput;
   if (auto bias = adaptor.getBias()) {
-    Value upcastBias = createCastOp(rewriter, loc, outputType, bias);
+    Value upcastBias =
+        createCastOp(rewriter, loc, outputType, bias,
+                     op.getBias().getType().getElementType(), origOutputType);
     shifted = createOpAndInfer<tosa::SubOp>(rewriter, loc, outputType,
                                             upcastInput, upcastBias);
   }
@@ -920,7 +980,8 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite(
   Value scaled = createOpAndInfer<tosa::MulOp>(
       rewriter, loc, elementType, input, inverseScale, /*shift=*/0);
 
-  Type outputType = getShapedElementTy(output);
+  Type origOutputType = getShapedElementTy(output);
+  Type outputType = getTypeConverter()->convertType(origOutputType);
   // If there is a bias, we upcast to the larger of the bias type and int32_t
   // or float (which is what the bias type is in dequantize, the MLIR
   // quantization implementation, and other ML frameworks) and then do a
@@ -929,17 +990,20 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite(
   Type biasType = outputType;
   if (bias) {
     biasType = getShapedElementTy(bias);
-    if (biasType.getIntOrFloatBitWidth() < 32) {
-      biasType = isa<IntegerType>(biasType) ? cast<Type>(rewriter.getI32Type())
-                                            : cast<Type>(rewriter.getF32Type());
-      bias = createCastOp(rewriter, loc, biasType, bias);
-    }
   }
-  Value asShort = createCastOp(rewriter, loc, biasType, scaled);
+  if ((bias || origOutputType != outputType) &&
+      biasType.getIntOrFloatBitWidth() < 32) {
+    biasType = isa<IntegerType>(biasType) ? cast<Type>(rewriter.getI32Type())
+                                          : cast<Type>(rewriter.getF32Type());
+  }
+  Value asShort = createCastOp(rewriter, loc, biasType, scaled, elementType);
   Value biased = asShort;
-  if (bias)
+  if (bias) {
+    bias = createCastOp(rewriter, loc, biasType, bias,
+                        op.getBias().getType().getElementType());
     biased =
         createOpAndInfer<tosa::AddOp>(rewriter, loc, biasType, asShort, bias);
+  }
 
   Value result = biased;
   if (biasType != outputType) {
@@ -961,20 +1025,28 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite(
       minI = APInt(64, (int64_t)(minF.convertToFloat()));
       maxI = APInt(64, (int64_t)(minF.convertToFloat()));
     } else {
-      minI = APInt::getSignedMinValue(width);
-      maxI = APInt::getSignedMaxValue(width);
-      minF.convertFromAPInt(minI, /*IsSigned=*/true,
+      minI = origOutputType.isUnsignedInteger()
+                 ? APInt::getMinValue(width)
+                 : APInt::getSignedMinValue(width);
+      maxI = origOutputType.isUnsignedInteger()
+                 ? APInt::getMaxValue(width)
+                 : APInt::getSignedMaxValue(width);
+      minF.convertFromAPInt(minI, /*IsSigned=*/origOutputType.isSignedInteger(),
                             APFloat::rmNearestTiesToEven);
-      maxF.convertFromAPInt(maxI, /*IsSigned=*/true,
+      maxF.convertFromAPInt(maxI, /*IsSigned=*/origOutputType.isSignedInteger(),
                             APFloat::rmNearestTiesToEven);
     }
 
     FloatAttr minFatt = rewriter.getFloatAttr(rewriter.getF32Type(), minF);
     FloatAttr maxFatt = rewriter.getFloatAttr(rewriter.getF32Type(), maxF);
-    result = createOpAndInfer<tosa::ClampOp>(
-        rewriter, loc, biasType, result, minI.getSExtValue(),
-        maxI.getSExtValue(), minFatt, maxFatt);
-    result = createCastOp(rewriter, loc, outputType, result);
+    auto minVal = origOutputType.isUnsignedInteger() ? minI.getZExtValue()
+                                                     : minI.getSExtValue();
+    auto maxVal = origOutputType.isUnsignedInteger() ? maxI.getZExtValue()
+                                                     : maxI.getSExtValue();
+    result = createOpAndInfer<tosa::ClampOp>(rewriter, loc, biasType, result,
+                                             minVal, maxVal, minFatt, maxFatt);
+    result = createCastOp(rewriter, loc, outputType, result, biasType,
+                          origOutputType);
   }
   rewriter.replaceOp(op, result);
 
@@ -984,21 +1056,29 @@ LogicalResult QuantizeLinearConverter::matchAndRewrite(
 LogicalResult
 ConvertConverter::matchAndRewrite(migraphx::ConvertOp op, OpAdaptor adaptor,
                                   ConversionPatternRewriter &rewriter) const {
-  if (!op.getZeroExtend()) {
+
+  auto inputType = op.getInA().getType().getElementType();
+  auto outputType = op.getResult().getType().getElementType();
+  if (inputType.isUnsignedInteger() || outputType.isUnsignedInteger()) {
+    assert(!inputType.isSignedInteger() && !outputType.isSignedInteger());
+    rewriter.replaceOpWithNewOp<tosa::CustomOp>(
+        op, getTypeConverter()->convertType(op.getResult().getType()),
+        "unsigned_cast", "rocmlir", "", adaptor.getInA());
+  } else {
     rewriter.replaceOpWithNewOp<tosa::CastOp>(
         op, getTypeConverter()->convertType(op.getResult().getType()),
         adaptor.getInA());
-    return success();
   }
-  rewriter.replaceOpWithNewOp<tosa::CustomOp>(
-      op, getTypeConverter()->convertType(op.getResult().getType()),
-      "unsigned_cast", "rocmlir", "", adaptor.getInA());
   return success();
 }
 
 LogicalResult
 NegConverter::matchAndRewrite(migraphx::NegOp op, OpAdaptor adaptor,
                               ConversionPatternRewriter &rewriter) const {
+  auto outElementType = op.getResult().getType().getElementType();
+  if (outElementType.isUnsignedInteger())
+    return op.emitOpError("can't negate an unsigned int type");
+
   rewriter.replaceOpWithNewOp<tosa::NegateOp>(
       op, getTypeConverter()->convertType(op.getResult().getType()),
       adaptor.getInA(), nullptr);
@@ -1077,11 +1157,39 @@ LogicalResult
 LiteralConverter::matchAndRewrite(migraphx::LiteralOp op, OpAdaptor adaptor,
                                   ConversionPatternRewriter &rewriter) const {
   MIXRShapedType type = op.getResult().getType();
-  RankedTensorType newType = type.asTensor();
+  RankedTensorType newType =
+      cast<RankedTensorType>(getTypeConverter()->convertType(type));
+
   ElementsAttr value = op.getValue();
-  if (value.isSplat() && value.getType() != newType)
-    value = SplatElementsAttr::get(newType, value.getSplatValue<Attribute>());
-  rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, type.asTensor(), value);
+  if (value.getType() != newType) {
+    if (value.isSplat()) {
+      // Get the original splat value (for example SI8 value)
+      Attribute splatValue = value.getSplatValue<Attribute>();
+
+      // Reinterpret the splatValue under the new type (for example SI8 -> I8),
+      // preserving bytes
+      Attribute newSplatValue;
+      if (auto intAttr = dyn_cast<IntegerAttr>(splatValue))
+        newSplatValue =
+            IntegerAttr::get(newType.getElementType(), intAttr.getValue());
+      else if (auto floatAttr = dyn_cast<FloatAttr>(splatValue))
+        newSplatValue =
+            FloatAttr::get(newType.getElementType(), floatAttr.getValue());
+      else
+        return failure();
+
+      // Create the new SplatElementsAttr (for example I8 type) with preserved
+      // value bytes
+      value = SplatElementsAttr::get(newType, newSplatValue);
+    } else {
+      // Reinterpret existing values under the new type
+      auto originalAttr = cast<mlir::DenseElementsAttr>(value);
+      value = DenseElementsAttr::get(newType, originalAttr.getRawData());
+    }
+  }
+
+  // Replace with the new operation using the updated tensor type
+  rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, newType, value);
   return success();
 }
 
@@ -1107,7 +1215,8 @@ WhereConverter::matchAndRewrite(migraphx::WhereOp op, OpAdaptor adaptor,
   Value rawCond = adaptor.getCond();
   Value inA = adaptor.getInA();
   Value inB = adaptor.getInB();
-  Value cond = createCastOp(rewriter, loc, rewriter.getI1Type(), rawCond);
+  Value cond = createCastOp(rewriter, loc, rewriter.getI1Type(), rawCond,
+                            op.getCond().getType().getElementType());
   rewriter.replaceOpWithNewOp<tosa::SelectOp>(
       op, getTypeConverter()->convertType(op.getResult().getType()), cond, inA,
       inB);
@@ -1275,27 +1384,26 @@ LogicalResult MHALLaunchConverter::matchAndRewrite(
 
 void migraphx::populateMIGraphXToTosaConversionPatterns(
     RewritePatternSet &patterns, TypeConverter &typeConverter) {
-  patterns
-      .add<ConvConverter<ConvolutionOp>, ConvConverter<QuantConvolutionOp>,
-           DotConverter<DotOp>, DotConverter<QuantDotOp>, BroadcastConverter,
-           MultiBroadcastConverter, TransposeConverter, ReshapeConverter,
-           SliceConverter, ReduceMeanConverter, ReduceSumConverter,
-           TrivialConverter<AddOp, tosa::AddOp>,
-           TrivialConverter<SubOp, tosa::SubOp>,
-           TrivialConverter<PowOp, tosa::PowOp>, DivConverter, MulConverter,
-           TrivialConverter<AbsOp, tosa::AbsOp>,
-           TrivialConverter<CeilOp, tosa::CeilOp>,
-           TrivialConverter<ErfOp, tosa::ErfOp>,
-           TrivialConverter<ExpOp, tosa::ExpOp>,
-           TrivialConverter<FloorOp, tosa::FloorOp>,
-           TrivialConverter<LogOp, tosa::LogOp>,
-           TrivialConverter<RecipOp, tosa::ReciprocalOp>,
-           TrivialConverter<RsqrtOp, tosa::RsqrtOp>,
-           TrivialConverter<SigmoidOp, tosa::SigmoidOp>,
-           TrivialConverter<TanhOp, tosa::TanhOp>, DeQuantizeLinearConverter,
-           QuantizeLinearConverter, DeQuantizeLinearConverter, ConvertConverter,
-           NegConverter, ReluConverter, SoftmaxConverter, LiteralConverter,
-           ClipConverter, WhereConverter>(typeConverter, patterns.getContext());
+  patterns.add<ConvConverter<ConvolutionOp>, ConvConverter<QuantConvolutionOp>,
+               DotConverter<DotOp>, DotConverter<QuantDotOp>,
+               BroadcastConverter, MultiBroadcastConverter, TransposeConverter,
+               ReshapeConverter, SliceConverter, ReduceMeanConverter,
+               ReduceSumConverter, TrivialConverter<AddOp, tosa::AddOp>,
+               TrivialConverter<SubOp, tosa::SubOp>,
+               TrivialConverter<PowOp, tosa::PowOp>, DivConverter, MulConverter,
+               TrivialConverter<AbsOp, tosa::AbsOp>,
+               TrivialConverter<CeilOp, tosa::CeilOp>,
+               TrivialConverter<ErfOp, tosa::ErfOp>,
+               TrivialConverter<ExpOp, tosa::ExpOp>,
+               TrivialConverter<FloorOp, tosa::FloorOp>,
+               TrivialConverter<LogOp, tosa::LogOp>,
+               TrivialConverter<RecipOp, tosa::ReciprocalOp>,
+               TrivialConverter<RsqrtOp, tosa::RsqrtOp>,
+               TrivialConverter<SigmoidOp, tosa::SigmoidOp>,
+               TrivialConverter<TanhOp, tosa::TanhOp>, QuantizeLinearConverter,
+               DeQuantizeLinearConverter, ConvertConverter, NegConverter,
+               ReluConverter, SoftmaxConverter, LiteralConverter, ClipConverter,
+               WhereConverter>(typeConverter, patterns.getContext());
 }
 
 void mlir::migraphx::populateMIGraphXFuncBoundaryToTosaConversionPatterns(
diff --git a/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp b/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp
index 6ed91afe28d9..112c08cc6946 100644
--- a/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp
+++ b/mlir/lib/Conversion/RocmlirCustomTosaToLinalg/RocmlirCustomTosaToLinalg.cpp
@@ -49,35 +49,61 @@ LogicalResult UnsignedCastLoweringPattern::matchAndRewrite(
     ConversionPatternRewriter &rewriter) const {
   if (op.getDomainName() != "rocmlir")
     return rewriter.notifyMatchFailure(op, "domain isn't rocmlir");
-  if (op.getOperatorName() != "unsigned_cast")
-    return rewriter.notifyMatchFailure(op, "isn't an unsigned_cast");
+  if (op.getOperatorName() != "unsigned_cast" &&
+      op.getOperatorName() != "unsigned_div")
+    return rewriter.notifyMatchFailure(
+        op, "isn't an unsigned_cast or unsigned_div");
+
   Location loc = op.getLoc();
   auto outType = cast<RankedTensorType>(op.getResults().front().getType());
+  Type inElemType =
+      cast<RankedTensorType>(op.getInputs().front().getType()).getElementType();
   Type outElemType = outType.getElementType();
   Value emptyTensor = rewriter.create<tensor::EmptyOp>(
       loc, outType, /*dynamic_sizes=*/ValueRange{});
 
   SmallVector<AffineMap> iterationMaps(
-      2, rewriter.getMultiDimIdentityMap(outType.getRank()));
+      op.getInputs().size() + 1,
+      rewriter.getMultiDimIdentityMap(outType.getRank()));
   SmallVector<utils::IteratorType> iteratorKinds(outType.getRank(),
                                                  utils::IteratorType::parallel);
-  auto cast = rewriter.create<linalg::GenericOp>(
+  auto genericOp = rewriter.create<linalg::GenericOp>(
       loc, outType, adaptor.getInputs(), emptyTensor, iterationMaps,
       iteratorKinds, [&](OpBuilder &b, Location loc, ValueRange inputs) {
         Value result;
-        if (isa<FloatType>(outElemType))
-          result = b.create<arith::UIToFPOp>(loc, outElemType, inputs[0]);
-        else
-          result = b.create<arith::ExtUIOp>(loc, outElemType, inputs[0]);
+        if (op.getOperatorName() == "unsigned_cast") {
+          assert(inputs.size() == 2);
+          if (isa<IntegerType>(inElemType)) {
+            if (isa<FloatType>(outElemType)) {
+              result = b.create<arith::UIToFPOp>(loc, outElemType, inputs[0]);
+            } else if (outElemType.getIntOrFloatBitWidth() >
+                       inElemType.getIntOrFloatBitWidth()) {
+              result = b.create<arith::ExtUIOp>(loc, outElemType, inputs[0]);
+            } else {
+              result = b.create<arith::TruncIOp>(loc, outElemType, inputs[0]);
+            }
+          } else {
+            assert(isa<FloatType>(inElemType));
+            assert(isa<IntegerType>(outElemType));
+            result = b.create<arith::FPToUIOp>(loc, outElemType, inputs[0]);
+          }
+        } else if (op.getOperatorName() == "unsigned_div") {
+          assert(isa<IntegerType>(outElemType));
+          assert(isa<IntegerType>(inElemType));
+          assert(inputs.size() == 3);
+          result =
+              b.create<arith::DivUIOp>(loc, outElemType, inputs[0], inputs[1]);
+        }
         b.create<linalg::YieldOp>(loc, result);
       });
-  rewriter.replaceOp(op, cast);
+  rewriter.replaceOp(op, genericOp);
   return success();
 }
 
 void mlir::rock::populateRocmlirCustomTosaToLinalgTarget(
     ConversionTarget &target) {
   target.addLegalOp<linalg::GenericOp, linalg::YieldOp, arith::ExtUIOp,
+                    arith::TruncIOp, arith::DivUIOp, arith::FPToUIOp,
                     arith::UIToFPOp, tensor::EmptyOp>();
   target.addDynamicallyLegalOp<tosa::CustomOp>(
       [](tosa::CustomOp op) { return op.getDomainName() != "rocmlir"; });
diff --git a/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp b/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp
index 6cf3b337f255..3e241923994c 100644
--- a/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp
+++ b/mlir/lib/Dialect/MIGraphX/IR/MIGraphX.cpp
@@ -246,7 +246,13 @@ RankedTensorType MIXRShapedType::asMemoryLayoutTensor() const {
       orderedShape[prevIdx] = stride / prevStride;
     }
   }
-  return RankedTensorType::get(orderedShape, getElementType());
+  Type elementType = getElementType();
+  if (elementType.isInteger() && !elementType.isSignlessInteger()) {
+    elementType =
+        IntegerType::get(getContext(), elementType.getIntOrFloatBitWidth(),
+                         IntegerType::SignednessSemantics::Signless);
+  }
+  return RankedTensorType::get(orderedShape, elementType);
 }
 
 RankedTensorType MIXRShapedType::asFlatMemoryTensor() const {
diff --git a/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp b/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp
index 08d8885c8eca..13c299e49e0e 100644
--- a/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp
+++ b/mlir/lib/Dialect/MIGraphX/Transforms/RealizeInt4.cpp
@@ -77,8 +77,15 @@ static MIXRShapedType asInt4Tensor(const MIXRShapedType byteType,
        llvm::enumerate(MutableArrayRef<int64_t>(strides)))
     if (static_cast<int64_t>(index) != axis)
       stride *= 2;
-  return MIXRShapedType::get(sizes, strides,
-                             IntegerType::get(byteType.getContext(), 4));
+
+  auto signedness = IntegerType::SignednessSemantics::Signless;
+  if (byteType.getElementType().isUnsignedInteger())
+    signedness = IntegerType::SignednessSemantics::Unsigned;
+  else if (byteType.getElementType().isSignedInteger())
+    signedness = IntegerType::SignednessSemantics::Signed;
+
+  return MIXRShapedType::get(
+      sizes, strides, IntegerType::get(byteType.getContext(), 4, signedness));
 }
 
 LogicalResult RewriteByteUnpackPattern::matchAndRewrite(
@@ -91,12 +98,9 @@ LogicalResult RewriteByteUnpackPattern::matchAndRewrite(
   int64_t axis = op.getAxis();
   MIXRShapedType packedByteType = op.getIn().getType();
   MIXRShapedType actualType = asInt4Tensor(packedByteType, axis);
-  Value reinterpreted = rewriter.create<UnpackOp>(
-      loc, actualType, adaptor.getIn(), axis, adaptor.getIsUnsigned());
-  rewriter.replaceOpWithNewOp<ConvertOp>(op, outType, reinterpreted,
-                                         /*zeroExtend=*/adaptor.getIsUnsigned()
-                                             ? rewriter.getUnitAttr()
-                                             : nullptr);
+  Value reinterpreted =
+      rewriter.create<UnpackOp>(loc, actualType, adaptor.getIn(), axis);
+  rewriter.replaceOpWithNewOp<ConvertOp>(op, outType, reinterpreted);
   return success();
 }
 
@@ -113,8 +117,7 @@ LogicalResult TransposeUnpackInterchange::matchAndRewrite(
   MIXRShapedType preTrReinterpretedType =
       asInt4Tensor(trOp.getInput().getType(), preTransposeAxis);
   Value reinterpreted = rewriter.create<UnpackOp>(
-      op.getLoc(), preTrReinterpretedType, trOp.getInput(), preTransposeAxis,
-      adaptor.getIsUnsigned());
+      op.getLoc(), preTrReinterpretedType, trOp.getInput(), preTransposeAxis);
   // Not a replaceOpWithNewOp() because we're keeping a different op's location.
   Value transposed = rewriter.create<TransposeOp>(
       trOp.getLoc(), op.getOut().getType(), reinterpreted, permutation);
@@ -144,9 +147,8 @@ LogicalResult ReshapeUnpackInterchange::matchAndRewrite(
       lastUnitDim = idx;
   MIXRShapedType oldShapeInt4 = asInt4Tensor(oldShapeBytes, lastUnitDim);
   MIXRShapedType newShapeInt4 = op.getOut().getType();
-  Value reinterpreted =
-      rewriter.create<UnpackOp>(op.getLoc(), oldShapeInt4, reshapeOp.getInput(),
-                                lastUnitDim, adaptor.getIsUnsigned());
+  Value reinterpreted = rewriter.create<UnpackOp>(
+      op.getLoc(), oldShapeInt4, reshapeOp.getInput(), lastUnitDim);
   Value reshaped = rewriter.create<ReshapeOp>(
       reshapeOp.getLoc(), newShapeInt4, reinterpreted,
       rewriter.getI64ArrayAttr(newShapeInt4.getShape()));
@@ -176,8 +178,7 @@ LogicalResult MultiBroadcastUnpackInterchange::matchAndRewrite(
     }
   }
   Value reinterpreted = rewriter.create<UnpackOp>(
-      op.getLoc(), preBroadcastInt4, broadcastOp.getInput(), adaptor.getAxis(),
-      adaptor.getIsUnsigned());
+      op.getLoc(), preBroadcastInt4, broadcastOp.getInput(), adaptor.getAxis());
   Value broadcasted = rewriter.create<MultiBroadcastOp>(
       broadcastOp.getLoc(), op.getOut().getType(), reinterpreted,
       rewriter.getArrayAttr(newOutLens));
@@ -195,7 +196,7 @@ LogicalResult FuncArgUnpackElimination::matchAndRewrite(
       dyn_cast<func::FuncOp>(unpackArg.getParentRegion()->getParentOp());
   if (!funcOp)
     return op.emitOpError("A tensor that'll be unpacked is an argument to "
-                          "somethng other than a function");
+                          "something other than a function");
   MIXRShapedType int4Type = op.getResult().getType();
   FunctionType funcType = funcOp.getFunctionType();
   SmallVector<Type> newInTypes(funcType.getInputs());
diff --git a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir
new file mode 100644
index 000000000000..27ee6a9da00d
--- /dev/null
+++ b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa-signed-unsigned-ints.mlir
@@ -0,0 +1,222 @@
+// RUN: rocmlir-opt -split-input-file --migraphx-to-tosa %s | FileCheck %s
+
+// CHECK-LABEL: @migraphx_literal_dense_ui8()
+// CHECK-SAME: -> tensor<4xi8> {
+func.func @migraphx_literal_dense_ui8() -> !migraphx.shaped<4xui8, 1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[23, 28, 19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8>
+  // CHECK-NEXT: return %[[const]] : tensor<4xi8>
+  %0 = migraphx.literal (dense<[23, 28, 19, 20]> : tensor<4xui8>) : <4xui8, 1>
+  return %0 : !migraphx.shaped<4xui8, 1>
+}
+
+// CHECK-LABEL: @migraphx_literal_dense_si8()
+// CHECK-SAME: -> tensor<4xi8> {
+func.func @migraphx_literal_dense_si8() -> !migraphx.shaped<4xsi8, 1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-23, 28, -19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8>
+  // CHECK-NEXT: return %[[const]] : tensor<4xi8>
+  %0 = migraphx.literal (dense<[-23, 28, -19, 20]> : tensor<4xsi8>) : <4xsi8, 1>
+  return %0 : !migraphx.shaped<4xsi8, 1>
+}
+
+// CHECK-LABEL: @migraphx_literal_dense_i8()
+// CHECK-SAME: -> tensor<4xi8> {
+func.func @migraphx_literal_dense_i8() -> !migraphx.shaped<4xi8, 1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-23, 28, -19, 20]> : tensor<4xi8>}> : () -> tensor<4xi8>
+  // CHECK-NEXT: return %[[const]] : tensor<4xi8>
+  %0 = migraphx.literal (dense<[-23, 28, -19, 20]> : tensor<4xi8>) : <4xi8, 1>
+  return %0 : !migraphx.shaped<4xi8, 1>
+}
+
+// CHECK-LABEL: @migraphx_literal_dense_f16()
+// CHECK-SAME: -> tensor<4xf16> {
+func.func @migraphx_literal_dense_f16() -> !migraphx.shaped<4xf16, 1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-2.300000e+01, 2.800000e+01, -1.900000e+01, 2.000000e+01]> : tensor<4xf16>}> : () -> tensor<4xf16>
+  // CHECK-NEXT: return %[[const]] : tensor<4xf16>
+  %0 = migraphx.literal (dense<[-23.0, 28.0, -19.0, 20.0]> : tensor<4xf16>) : <4xf16, 1>
+  return %0 : !migraphx.shaped<4xf16, 1>
+}
+
+// CHECK-LABEL: @migraphx_literal_dense_f32()
+// CHECK-SAME: -> tensor<4xf32> {
+func.func @migraphx_literal_dense_f32() -> !migraphx.shaped<4xf32, 1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<[-2.300000e+01, 2.800000e+01, -1.900000e+01, 2.000000e+01]> : tensor<4xf32>}> : () -> tensor<4xf32>
+  // CHECK-NEXT: return %[[const]] : tensor<4xf32>
+  %0 = migraphx.literal (dense<[-23.0, 28.0, -19.0, 20.0]> : tensor<4xf32>) : <4xf32, 1>
+  return %0 : !migraphx.shaped<4xf32, 1>
+}
+
+// CHECK-LABEL: @migraphx_literal_zero()
+// CHECK-SAME: -> tensor<9408xi8> {
+func.func @migraphx_literal_zero() -> !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<0> : tensor<64x3x7x7xi8>}> : () -> tensor<64x3x7x7xi8>
+  // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array<i64: 9408>} : (tensor<64x3x7x7xi8>) -> tensor<9408xi8>
+  // CHECK-NEXT: return %[[reshape]] : tensor<9408xi8>
+  %0 = migraphx.literal (dense<0> : tensor<64x1xsi8>) : <64x3x7x7xsi8, 147x49x7x1>
+  return %0 : !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1>
+}
+
+// CHECK-LABEL: @migraphx_literal_negative()
+// CHECK-SAME: -> tensor<9408xi8> {
+func.func @migraphx_literal_negative() -> !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1> {
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<-1> : tensor<64x3x7x7xi8>}> : () -> tensor<64x3x7x7xi8>
+  // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array<i64: 9408>} : (tensor<64x3x7x7xi8>) -> tensor<9408xi8>
+  // CHECK-NEXT: return %[[reshape]] : tensor<9408xi8>
+  %0 = migraphx.literal (dense<-1> : tensor<64x1xsi8>) : <64x3x7x7xsi8, 147x49x7x1>
+  return %0 : !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1>
+}
+
+// CHECK-LABEL: @migraphx_convert_int4_signed
+// CHECK: tosa.cast
+// CHECK-SAME: (tensor<16xi4>) -> tensor<16xi8>
+func.func @migraphx_convert_int4_signed(%arg0: !migraphx.shaped<16xsi4, 1>) -> !migraphx.shaped<16xsi8, 1> {
+  %0 = migraphx.convert %arg0 : <16xsi4, 1> to <16xsi8, 1>
+  return %0 : !migraphx.shaped<16xsi8, 1>
+}
+
+// CHECK-LABEL: @migraphx_convert_int4_unsigned
+// CHECK: tosa.custom
+// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xi8>
+func.func @migraphx_convert_int4_unsigned(%arg0: !migraphx.shaped<16xui4, 1>) -> !migraphx.shaped<16xui8, 1> {
+  %0 = migraphx.convert %arg0 : <16xui4, 1> to <16xui8, 1>
+  return %0 : !migraphx.shaped<16xui8, 1>
+}
+
+// CHECK-LABEL: @migraphx_convert_int4_unsigned_reverse
+// CHECK: tosa.custom
+// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi8>) -> tensor<16xi4>
+func.func @migraphx_convert_int4_unsigned_reverse(%arg0: !migraphx.shaped<16xui8, 1>) -> !migraphx.shaped<16xui4, 1> {
+  %0 = migraphx.convert %arg0 : <16xui8, 1> to <16xui4, 1>
+  return %0 : !migraphx.shaped<16xui4, 1>
+}
+
+// CHECK-LABEL: @migraphx_convert_int4_unsigned_to_float
+// CHECK: tosa.custom
+// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xf32>
+func.func @migraphx_convert_int4_unsigned_to_float(%arg0: !migraphx.shaped<16xui4, 1>) -> !migraphx.shaped<16xf32, 1> {
+  %0 = migraphx.convert %arg0 : <16xui4, 1> to <16xf32, 1>
+  return %0 : !migraphx.shaped<16xf32, 1>
+}
+
+// CHECK-LABEL: @migraphx_convert_int4_float_to_unsigned
+// CHECK: tosa.custom
+// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xf32>) -> tensor<16xi4>
+func.func @migraphx_convert_int4_float_to_unsigned(%arg0: !migraphx.shaped<16xf32, 1>) -> !migraphx.shaped<16xui4, 1> {
+  %0 = migraphx.convert %arg0 : <16xf32, 1> to <16xui4, 1>
+  return %0 : !migraphx.shaped<16xui4, 1>
+}
+
+// CHECK-LABEL: @migraphx_div_si32
+// CHECK: tosa.int_div
+// CHECK-SAME: (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32>
+func.func @migraphx_div_si32(%arg0: !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1>, %arg1: !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1>) -> !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1> attributes{kernel, arch = ""} {
+  %0 = migraphx.div %arg0, %arg1 : <1x36x384x64xsi32, 884736x24576x64x1>, <1x36x384x64xsi32, 884736x24576x64x1> -> <1x36x384x64xsi32, 884736x24576x64x1>
+  return %0 : !migraphx.shaped<1x36x384x64xsi32, 884736x24576x64x1>
+}
+
+// CHECK-LABEL: @migraphx_div_ui32
+// CHECK: tosa.custom
+// CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_div"} : (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32>
+func.func @migraphx_div_ui32(%arg0: !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1>, %arg1: !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1>) -> !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1> attributes{kernel, arch = ""} {
+  %0 = migraphx.div %arg0, %arg1 : <1x36x384x64xui32, 884736x24576x64x1>, <1x36x384x64xui32, 884736x24576x64x1> -> <1x36x384x64xui32, 884736x24576x64x1>
+  return %0 : !migraphx.shaped<1x36x384x64xui32, 884736x24576x64x1>
+}
+
+// CHECK-LABEL: func @dequantize_scale_bias_ui32
+// CHECK: tosa.custom %{{.*}} {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xf32>
+// CHECK: tosa.custom %{{.*}} {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<64xi32>) -> tensor<64xf32>
+// CHECK: tosa.sub
+// CHECK: tosa.mul
+func.func @dequantize_scale_bias_ui32(%arg: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui32, 1>) -> !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.dequantizelinear %arg, %scale, %bias : <1x112x112x64xui32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui32, 1> -> <1x112x112x64xf32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @dequantize_scale_bias_si32
+// CHECK: tosa.cast{{.*}}f32
+// CHECK: tosa.cast{{.*}}f32
+// CHECK: tosa.sub
+// CHECK: tosa.mul
+func.func @dequantize_scale_bias_si32(%arg: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi32, 1>) -> !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.dequantizelinear %arg, %scale, %bias : <1x112x112x64xsi32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi32, 1> -> <1x112x112x64xf32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @quantize_scale_bias_ui32
+// CHECK: tosa.reciprocal
+// CHECK: tosa.mul
+// CHECK: tosa.cast{{.*}}: (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xi32>
+// CHECK: tosa.add
+func.func @quantize_scale_bias_ui32(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui32, 1>) -> !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui32, 1> -> <1x112x112x64xui32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @quantize_scale_bias_si32
+// CHECK: tosa.reciprocal
+// CHECK: tosa.mul
+// CHECK: tosa.cast{{.*}}f32{{.*}}i32
+// CHECK: tosa.add
+func.func @quantize_scale_bias_si32(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi32, 1>) -> !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi32, 1> -> <1x112x112x64xsi32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @quantize_scale_bias_ui8
+// CHECK: tosa.reciprocal
+// CHECK: tosa.mul
+// CHECK: tosa.cast{{.*}}: (tensor<1x112x112x64xf32>) -> tensor<1x112x112x64xi32>
+// CHECK: tosa.custom{{.*}}i8{{.*}}i32
+// CHECK: tosa.add
+// CHECK: tosa.clamp{{.*}}i32{{.*}}i32
+// CHECK: tosa.custom{{.*}}i32{{.*}}i8
+func.func @quantize_scale_bias_ui8(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xui8, 1>) -> !migraphx.shaped<1x112x112x64xui8, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xui8, 1> -> <1x112x112x64xui8, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xui8, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @quantize_scale_bias_si8
+// CHECK: tosa.reciprocal
+// CHECK: tosa.mul
+// CHECK: tosa.cast{{.*}}f32{{.*}}i32
+// CHECK: tosa.cast{{.*}}i8{{.*}}i32
+// CHECK: tosa.add
+// CHECK: tosa.clamp{{.*}}i32{{.*}}i32
+// CHECK: tosa.cast{{.*}}i32{{.*}}i8
+func.func @quantize_scale_bias_si8(%arg: !migraphx.shaped<1x112x112x64xf32, 802816x7168x64x1>, %scale: !migraphx.shaped<64xf32, 1>, %bias: !migraphx.shaped<64xsi8, 1>) -> !migraphx.shaped<1x112x112x64xsi8, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.quantizelinear %arg, %scale, %bias : <1x112x112x64xf32, 802816x7168x64x1>, <64xf32, 1>, !migraphx.shaped<64xsi8, 1> -> <1x112x112x64xsi8, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xsi8, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @basic_add_ui32
+// CHECK: tosa.add{{.*}}(tensor<1x112x112x64xi32>, tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xi32>
+func.func @basic_add_ui32(%arg0: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>, %arg1: !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>) -> !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.add %arg0, %arg1 : <1x112x112x64xui32, 802816x7168x64x1>, <1x112x112x64xui32, 802816x7168x64x1> -> <1x112x112x64xui32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xui32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @basic_add_si32
+// CHECK: tosa.add{{.*}}(tensor<1x112x112x64xi32>, tensor<1x112x112x64xi32>) -> tensor<1x112x112x64xi32>
+func.func @basic_add_si32(%arg0: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>, %arg1: !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>) -> !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.add %arg0, %arg1 : <1x112x112x64xsi32, 802816x7168x64x1>, <1x112x112x64xsi32, 802816x7168x64x1> -> <1x112x112x64xsi32, 802816x7168x64x1>
+  return %1 : !migraphx.shaped<1x112x112x64xsi32, 802816x7168x64x1>
+}
+
+// CHECK-LABEL: func @conv_with_quant_si8
+// CHECK: tosa.conv2d{{.*}}quantization_info{{.*}}(tensor<1x224x224x3xi8>, tensor<64x7x7x3xi8>, tensor<64xi32>) -> tensor<1x112x112x64xi32>
+// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xf32>
+// CHECK: tosa.cast{{.*}}(tensor<1x64x1x1xi32>) -> tensor<1x64x1x1xf32>
+// CHECK: tosa.sub{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32>
+// CHECK: tosa.mul{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32>
+// CHECK: tosa.reciprocal{{.*}}(tensor<1x64x1x1xf32>) -> tensor<1x64x1x1xf32>
+// CHECK: tosa.mul{{.*}}(tensor<1x64x112x112xf32>, tensor<1x64x1x1xf32>) -> tensor<1x64x112x112xf32>
+// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xf32>) -> tensor<1x64x112x112xi32>
+// CHECK: tosa.cast{{.*}}(tensor<1x64x1x1xi8>) -> tensor<1x64x1x1xi32>
+// CHECK: tosa.add{{.*}}(tensor<1x64x112x112xi32>, tensor<1x64x1x1xi32>) -> tensor<1x64x112x112xi32>
+// CHECK: tosa.clamp{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xi32>
+// CHECK: tosa.cast{{.*}}(tensor<1x64x112x112xi32>) -> tensor<1x64x112x112xi8>
+func.func @conv_with_quant_si8(%arg1: !migraphx.shaped<1x3x224x224xsi8, 150528x50176x224x1>, %arg2: !migraphx.shaped<64x3x7x7xsi8, 147x49x7x1>, %scale: !migraphx.shaped<1x64x1x1xf32, 64x1x1x1>, %bias: !migraphx.shaped<1x64x1x1xsi32, 64x1x1x1>, %bias2: !migraphx.shaped<1x64x1x1xsi8, 64x1x1x1>) -> !migraphx.shaped<1x64x112x112xsi8, 802816x12544x112x1> attributes {kernel = "mixr"} {
+  %1 = migraphx.quant_convolution %arg1, %arg2 {dilation = [1, 1], group = 1 : i64, padding = [3, 3, 3, 3], padding_mode = 0 : i64, stride = [2, 2]} : <1x3x224x224xsi8, 150528x50176x224x1>, <64x3x7x7xsi8, 147x49x7x1> -> <1x64x112x112xsi32, 802816x12544x112x1>
+  %2 = migraphx.dequantizelinear %1, %scale, %bias : <1x64x112x112xsi32, 802816x12544x112x1>, <1x64x1x1xf32, 64x1x1x1>, !migraphx.shaped<1x64x1x1xsi32, 64x1x1x1> -> <1x64x112x112xf32, 802816x12544x112x1>
+  %3 = migraphx.quantizelinear %2, %scale, %bias2 : <1x64x112x112xf32, 802816x12544x112x1>, <1x64x1x1xf32, 64x1x1x1>, !migraphx.shaped<1x64x1x1xsi8, 64x1x1x1> -> <1x64x112x112xsi8, 802816x12544x112x1>
+  return %3 : !migraphx.shaped<1x64x112x112xsi8, 802816x12544x112x1>
+}
diff --git a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir
index f9526ecbd500..e671fd336c52 100644
--- a/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir
+++ b/mlir/test/Conversion/MIGraphXToTosa/migraphx-to-tosa.mlir
@@ -167,7 +167,7 @@ func.func @scalar0d(%arg0: !migraphx.shaped<f32>) -> !migraphx.shaped<f32> {
 
 // CHECK-LABEL: @conv3d_add
 // CHECK-SAME: (%{{.*}}: tensor<4xf32>, %{{.*}}: tensor<750xf32>, %{{.*}}: tensor<96xf32>) -> tensor<64xf32>
-func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>  {
+func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1: !migraphx.shaped<2x3x5x5x5xf32, 375x125x25x5x1>, %arg2: !migraphx.shaped<4x3x2x2x2xf32, 24x8x4x2x1>) -> !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1> {
   // CHECK-COUNT-3: tosa.transpose
   // CHECK: tosa.conv3d
   // CHECK-SAME: (tensor<2x5x5x5x3xf32>, tensor<4x2x2x2x3xf32>, tensor<4xf32>) -> tensor<2x2x2x2x4xf32>
@@ -176,3 +176,16 @@ func.func @conv3d_add(%arg0: !migraphx.shaped<2x4x2x2x2xf32, 0x1x0x0x0>, %arg1:
   %1 = migraphx.add %0, %arg0 : <2x4x2x2x2xf32, 32x8x4x2x1>, <2x4x2x2x2xf32, 0x1x0x0x0> -> <2x4x2x2x2xf32, 32x8x4x2x1>
   return %1 : !migraphx.shaped<2x4x2x2x2xf32, 32x8x4x2x1>
 }
+
+// CHECK-LABEL: @conv1d_add
+// CHECK-SAME: (%{{.*}}: tensor<64xf32>, %{{.*}}: tensor<672xf32>, %{{.*}}: tensor<1344xf32>) -> tensor<14336xf32>
+func.func @conv1d_add(%arg0: !migraphx.shaped<1x64x224xf32, 0x1x0>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> {
+  // CHECK-COUNT-3: tosa.transpose
+  // CHECK: tosa.conv2d
+  // CHECK-SAME: {dilation = array<i64: 1, 1>, group = 1 : i64, pad = array<i64: 3, 3, 0, 0>, stride = array<i64: 1, 1>} : (tensor<1x224x1x3xf32>, tensor<64x7x1x3xf32>, tensor<64xf32>) -> tensor<1x224x1x64xf32>
+  // CHECK-2: tosa.transpose
+  // CHECK: tosa.add
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1>
+  %1 = migraphx.add %0, %arg0 : <1x64x224xf32, 14336x224x1>, <1x64x224xf32, 0x1x0> -> <1x64x224xf32, 14336x224x1>
+  return %1 : !migraphx.shaped<1x64x224xf32, 14336x224x1>
+}
diff --git a/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir b/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir
index 28eebb812525..62cd1d8e5e74 100644
--- a/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir
+++ b/mlir/test/Conversion/MIGraphXToTosa/mixr-to-tosa-ops.mlir
@@ -1,6 +1,15 @@
 // RUN: rocmlir-opt -split-input-file --migraphx-transform --canonicalize --migraphx-to-tosa %s -verify-diagnostics -o -| FileCheck %s
 
 module  {
+  // CHECK-LABEL: func @literal_zero
+  // CHECK: %[[const:.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64x3x7x7xf16>}> : () -> tensor<64x3x7x7xf16>
+  // CHECK-NEXT: %[[reshape:.+]] = tosa.reshape %[[const]] {new_shape = array<i64: 9408>} : (tensor<64x3x7x7xf16>) -> tensor<9408xf16>
+  // CHECK-NEXT: return %[[reshape]] : tensor<9408xf16>
+  func.func @literal_zero() -> !migraphx.shaped<64x3x7x7xf16, 147x49x7x1> {
+    %0 = migraphx.literal (dense<0.0> : tensor<64x1xf16>) : <64x3x7x7xf16, 147x49x7x1>
+    return %0 : !migraphx.shaped<64x3x7x7xf16, 147x49x7x1>
+  }
+
   // CHECK-LABEL: func @dequantize_scale
   // CHECK-NOT: tosa.sub
   // CHECK: tosa.cast
@@ -108,8 +117,8 @@ module  {
   // CHECK-LABEL: func @quantize_scale_bias
   // CHECK: tosa.reciprocal
   // CHECK: tosa.mul
-  // CHECK: tosa.cast{{.*}}i8{{.*}}i32
   // CHECK: tosa.cast{{.*}}f32{{.*}}i32
+  // CHECK: tosa.cast{{.*}}i8{{.*}}i32
   // CHECK: tosa.add
   // CHECK: tosa.clamp
   // CHECK-SAME: max_int = 127
@@ -123,8 +132,8 @@ module  {
   // CHECK-LABEL: func @quantize_scale_bias_fp8
   // CHECK: tosa.reciprocal
   // CHECK: tosa.mul
-  // CHECK: tosa.cast{{.*}}f8E4M3FNUZ{{.*}}f32
   // CHECK: tosa.cast{{.*}}f32{{.*}}f32
+  // CHECK: tosa.cast{{.*}}f8E4M3FNUZ{{.*}}f32
   // CHECK: tosa.add
   // CHECK: tosa.clamp
   // CHECK-SAME: max_fp = 2.400000e+02
@@ -138,8 +147,8 @@ module  {
   // CHECK-LABEL: func @quantize_scale_bias_fp8_ocp
   // CHECK: tosa.reciprocal
   // CHECK: tosa.mul
-  // CHECK: tosa.cast{{.*}}f8E4M3FN{{.*}}f32
   // CHECK: tosa.cast{{.*}}f32{{.*}}f32
+  // CHECK: tosa.cast{{.*}}f8E4M3FN{{.*}}f32
   // CHECK: tosa.add
   // CHECK: tosa.clamp
   // CHECK-SAME: max_fp = 4.480000e+02
@@ -153,8 +162,8 @@ module  {
   // CHECK-LABEL: func @quantize_scale_bias_f16
   // CHECK: tosa.reciprocal
   // CHECK: tosa.mul
-  // CHECK: tosa.cast{{.*}}i8{{.*}}i32
   // CHECK: tosa.cast{{.*}}f16{{.*}}i32
+  // CHECK: tosa.cast{{.*}}i8{{.*}}i32
   // CHECK: tosa.add
   // CHECK: tosa.clamp
   // CHECK: tosa.cast
@@ -238,6 +247,24 @@ module  {
      return %0 : !migraphx.shaped<32x64xf32, 64x1>
   }
 
+  // CHECK-LABEL: func.func @matmul_broadcast_op
+  func.func @matmul_broadcast_op(%arg0: !migraphx.shaped<64x64x2304xf16, 147456x2304x1>, %arg1: !migraphx.shaped<64x64x768xf16, 49152x768x1>, %arg2: !migraphx.shaped<1x768x2304xf16, 1769472x2304x1>) -> !migraphx.shaped<64x64x2304xf16, 147456x2304x1> attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr"} {
+    // CHECK-DAG: %[[ARG2:.*]] = tosa.reshape %arg2 {new_shape = array<i64: 1, 768, 2304>}
+    // CHECK-DAG: %[[ARG1:.*]] = tosa.reshape %arg1 {new_shape = array<i64: 64, 64, 768>}
+    // CHECK-DAG: %[[ARG0:.*]] = tosa.reshape %arg0 {new_shape = array<i64: 64, 64, 2304>}
+    // CHECK-DAG: %[[INPUT:.*]] = tosa.reshape %[[ARG2]] {new_shape = array<i64: 1, 768, 2304>}
+    %0 = migraphx.broadcast %arg2 {axis = 0, out_lens = [64, 768, 2304]} : <1x768x2304xf16, 1769472x2304x1> -> <64x768x2304xf16, 0x2304x1>
+    // CHECK-DAG: %[[CST0:.*]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64x768x2304xf16>}> : () -> tensor<64x768x2304xf16>
+    // CHECK-DAG: %[[ADD:.*]] = tosa.add %[[CST0]], %[[INPUT]]
+    %1 = migraphx.dot %arg1, %0 : <64x64x768xf16, 49152x768x1>, <64x768x2304xf16, 0x2304x1> -> <64x64x2304xf16, 147456x2304x1>
+    // CHECK-DAG: %[[MATMUL:.*]] = tosa.matmul %[[ARG1]], %[[ADD]]
+    // CHECK-DAG: %[[BIASED:.*]] = tosa.add %[[MATMUL]], %[[ARG0]]
+    // CHECK-DAG: %[[RET:.*]] = tosa.reshape %[[BIASED]] {new_shape = array<i64: 9437184>}
+    // CHECK: return %[[RET]]
+    %2 = migraphx.add %1, %arg0 : <64x64x2304xf16, 147456x2304x1>, <64x64x2304xf16, 147456x2304x1> -> <64x64x2304xf16, 147456x2304x1>
+    return %2 : !migraphx.shaped<64x64x2304xf16, 147456x2304x1>
+  }
+
   // CHECK-LABEL: func.func @matmul_broadcast
   func.func @matmul_broadcast(%arg0: !migraphx.shaped<64x64x2304xf16, 147456x2304x1>, %arg1: !migraphx.shaped<64x64x768xf16, 49152x768x1>, %arg2: !migraphx.shaped<1x768x2304xf16, 1769472x2304x1>) -> !migraphx.shaped<64x64x2304xf16, 147456x2304x1> attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr"} {
     // CHECK-DAG: %[[ARG2:.*]] = tosa.reshape %arg2 {new_shape = array<i64: 1, 768, 2304>}
@@ -541,14 +568,6 @@ module {
      return %0 : !migraphx.shaped<16xf32, 1>
   }
 
-  // CHECK-LABEL: func.func @func_convert
-  // CHECK: tosa.custom
-  // CHECK-SAME: {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<16xi4>) -> tensor<16xi8>
-  func.func @func_convert_int4_unsigned(%arg0: !migraphx.shaped<16xi4, 1>) -> !migraphx.shaped<16xi8, 1> {
-    %0 = migraphx.convert zero_extend %arg0 : <16xi4, 1> to <16xi8, 1>
-     return %0 : !migraphx.shaped<16xi8, 1>
-  }
-
   // CHECK-LABEL: func.func @func_div_f32
   // CHECK: tosa.reciprocal
   // CHECK: tosa.mul
diff --git a/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir b/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir
index a63aa7e6624f..55dd7239a624 100644
--- a/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/RocmlirCustomTosaToLinalg/rocmlir-custom-tosa-to-linalg.mlir
@@ -1,6 +1,6 @@
 // RUN: rocmlir-opt --rocmlir-custom-tosa-to-linalg --split-input-file %s | FileCheck %s
 
-// CHECK-LABEL: @integers
+// CHECK-LABEL: @integers_i4_to_i8
 // CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>)
 // CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi8>
 // CHECK: %[[ret:.+]] = linalg.generic
@@ -11,17 +11,91 @@
 // CHECK-NEXT: linalg.yield %[[res]]
 // CHECK-NEXT: -> tensor<8x8x2xi8>
 // CHECK-NEXT: return %[[ret]]
-func.func @integers(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xi8> {
+func.func @integers_i4_to_i8(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xi8> {
   %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xi8>
   func.return %out : tensor<8x8x2xi8>
 }
 
+// CHECK-LABEL: @integers_i8_to_i4
+// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi8>)
+// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi4>
+// CHECK: %[[ret:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi8>)
+// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xi4>)
+// CHECK-NEXT: %[[in:.+]]: i8
+// CHECK-NEXT: %[[res:.+]] = arith.trunci %[[in]] : i8 to i4
+// CHECK-NEXT: linalg.yield %[[res]]
+// CHECK-NEXT: -> tensor<8x8x2xi4>
+// CHECK-NEXT: return %[[ret]]
+func.func @integers_i8_to_i4(%arg0: tensor<8x8x2xi8>) -> tensor<8x8x2xi4> {
+  %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi8>) -> tensor<8x8x2xi4>
+  func.return %out : tensor<8x8x2xi4>
+}
+
 // -----
 
-// CHECK-LABEL: @floats
-// CHECK: linalg.generic
-// CHECK: arith.uitofp
-func.func @floats(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf16> {
+// CHECK-LABEL: @floats_i4_to_f16
+// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>)
+// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xf16>
+// CHECK: %[[ret:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi4>)
+// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xf16>)
+// CHECK-NEXT: %[[in:.+]]: i4
+// CHECK-NEXT: %[[res:.+]] = arith.uitofp %[[in]] : i4 to f16
+// CHECK-NEXT: linalg.yield %[[res]]
+// CHECK-NEXT: -> tensor<8x8x2xf16>
+// CHECK-NEXT: return %[[ret]]
+func.func @floats_i4_to_f16(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf16> {
   %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xf16>
   func.return %out : tensor<8x8x2xf16>
 }
+
+// CHECK-LABEL: @floats_i4_to_f32
+// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xi4>)
+// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xf32>
+// CHECK: %[[ret:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xi4>)
+// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xf32>)
+// CHECK-NEXT: %[[in:.+]]: i4
+// CHECK-NEXT: %[[res:.+]] = arith.uitofp %[[in]] : i4 to f32
+// CHECK-NEXT: linalg.yield %[[res]]
+// CHECK-NEXT: -> tensor<8x8x2xf32>
+// CHECK-NEXT: return %[[ret]]
+func.func @floats_i4_to_f32(%arg0: tensor<8x8x2xi4>) -> tensor<8x8x2xf32> {
+  %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xi4>) -> tensor<8x8x2xf32>
+  func.return %out : tensor<8x8x2xf32>
+}
+
+// CHECK-LABEL: @floats_f16_to_i8
+// CHECK-SAME: (%[[arg0:.+]]: tensor<8x8x2xf16>)
+// CHECK: %[[empty:.+]] = tensor.empty() : tensor<8x8x2xi8>
+// CHECK: %[[ret:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[arg0]] : tensor<8x8x2xf16>)
+// CHECK-SAME: outs(%[[empty]] : tensor<8x8x2xi8>)
+// CHECK-NEXT: %[[in:.+]]: f16
+// CHECK-NEXT: %[[res:.+]] = arith.fptoui %[[in]] : f16 to i8
+// CHECK-NEXT: linalg.yield %[[res]]
+// CHECK-NEXT: -> tensor<8x8x2xi8>
+// CHECK-NEXT: return %[[ret]]
+func.func @floats_f16_to_i8(%arg0: tensor<8x8x2xf16>) -> tensor<8x8x2xi8> {
+  %out = tosa.custom %arg0 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_cast"} : (tensor<8x8x2xf16>) -> tensor<8x8x2xi8>
+  func.return %out : tensor<8x8x2xi8>
+}
+
+// -----
+
+// CHECK-LABEL: @unsigned_div
+// CHECK-SAME: (%[[arg0:.+]]: tensor<1x36x384x64xi32>, %[[arg1:.+]]: tensor<1x36x384x64xi32>)
+// CHECK: %[[empty:.+]] = tensor.empty() : tensor<1x36x384x64xi32>
+// CHECK: %[[ret:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[arg0]], %[[arg1]] : tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>)
+// CHECK-SAME: outs(%[[empty]] : tensor<1x36x384x64xi32>)
+// CHECK-NEXT: %[[in:.+]]: i32, %[[in1:.+]]: i32, %[[out:.+]]: i32
+// CHECK-NEXT: %[[res:.+]] = arith.divui %[[in]], %[[in1]] : i32
+// CHECK-NEXT: linalg.yield %[[res]]
+// CHECK-NEXT: -> tensor<1x36x384x64xi32>
+// CHECK-NEXT: return %[[ret]]
+func.func @unsigned_div(%arg0: tensor<1x36x384x64xi32>, %arg1: tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32> {
+  %out = tosa.custom %arg0, %arg1 {domain_name = "rocmlir", implementation_attrs = "", operator_name = "unsigned_div"} : (tensor<1x36x384x64xi32>, tensor<1x36x384x64xi32>) -> tensor<1x36x384x64xi32>
+  func.return %out : tensor<1x36x384x64xi32>
+}
diff --git a/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir b/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir
index 1f0e37ec623b..7829cd43ad91 100644
--- a/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir
+++ b/mlir/test/Conversion/TosaToRock/tosa-to-rock.mlir
@@ -26,6 +26,33 @@ func.func private @mlir_conv3d(%arg0: tensor<4x1x1x1x1xf32>, %arg1: tensor<2x5x5
   return %8 : tensor<2x2x2x2x4xf32>
 }
 
+// CHECK-LABEL: mlir_conv1d
+// CHECK: %[[convRes:.*]] = rock.conv(%{{.*}}, %{{.*}}, %{{.*}}) features =  none {arch = "", dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "y", "x", "c"], input_layout = ["ni", "hi", "wi", "gi", "ci"], output_layout = ["no", "ho", "wo", "go", "ko"], padding = [3 : index, 3 : index, 0 : index, 0 : index], strides = [1 : index, 1 : index]} : tensor<1x64x7x1x3xf32>, tensor<1x224x1x1x3xf32>, tensor<1x224x1x1x64xf32> -> tensor<1x224x1x1x64xf32>
+// CHECK-NEXT: %[[castRes:.*]] = rock.tensor_untransform_cast %[[convRes]] aka %{{.*}} : tensor<1x224x1x1x64xf32> to tensor<1x224x1x64xf32>
+// CHECK-NEXT: %[[reshapeRes:.*]] = tosa.reshape %[[castRes]] {new_shape = array<i64: 1, 224, 64>} : (tensor<1x224x1x64xf32>) -> tensor<1x224x64xf32>
+
+func.func private @mlir_conv1d(%arg0: tensor<64xf32>, %arg1: tensor<672xf32>, %arg2: tensor<1344xf32>) -> tensor<14336xf32> attributes {kernel, arch = ""} {
+    %0 = tosa.reshape %arg0 {new_shape = array<i64: 64, 1, 1>} : (tensor<64xf32>) -> tensor<64x1x1xf32>
+    %1 = "tosa.const"() <{value = dense<[2, 0, 1]> : tensor<3xi32>}> : () -> tensor<3xi32>
+    %2 = tosa.transpose %0, %1 : (tensor<64x1x1xf32>, tensor<3xi32>) -> tensor<1x64x1xf32>
+    %3 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1x64x224xf32>}> : () -> tensor<1x64x224xf32>
+    %4 = tosa.add %3, %2 : (tensor<1x64x224xf32>, tensor<1x64x1xf32>) -> tensor<1x64x224xf32>
+    %5 = tosa.reshape %arg2 {new_shape = array<i64: 64, 3, 7>} : (tensor<1344xf32>) -> tensor<64x3x7xf32>
+    %6 = tosa.reshape %arg1 {new_shape = array<i64: 1, 3, 224>} : (tensor<672xf32>) -> tensor<1x3x224xf32>
+    %7 = "tosa.const"() <{value = dense<[0, 2, 1]> : tensor<3xi32>}> : () -> tensor<3xi32>
+    %8 = tosa.transpose %6, %7 : (tensor<1x3x224xf32>, tensor<3xi32>) -> tensor<1x224x3xf32>
+    %9 = tosa.transpose %5, %7 : (tensor<64x3x7xf32>, tensor<3xi32>) -> tensor<64x7x3xf32>
+    %10 = tosa.reshape %8 {new_shape = array<i64: 1, 224, 1, 3>} : (tensor<1x224x3xf32>) -> tensor<1x224x1x3xf32>
+    %11 = tosa.reshape %9 {new_shape = array<i64: 64, 7, 1, 3>} : (tensor<64x7x3xf32>) -> tensor<64x7x1x3xf32>
+    %12 = "tosa.const"() <{value = dense<0.000000e+00> : tensor<64xf32>}> : () -> tensor<64xf32>
+    %13 = tosa.conv2d %10, %11, %12 {dilation = array<i64: 1, 1>, group = 1 : i64, pad = array<i64: 3, 3, 0, 0>, stride = array<i64: 1, 1>} : (tensor<1x224x1x3xf32>, tensor<64x7x1x3xf32>, tensor<64xf32>) -> tensor<1x224x1x64xf32>
+    %14 = tosa.reshape %13 {new_shape = array<i64: 1, 224, 64>} : (tensor<1x224x1x64xf32>) -> tensor<1x224x64xf32>
+    %15 = tosa.transpose %14, %7 : (tensor<1x224x64xf32>, tensor<3xi32>) -> tensor<1x64x224xf32>
+    %16 = tosa.add %15, %4 : (tensor<1x64x224xf32>, tensor<1x64x224xf32>) -> tensor<1x64x224xf32>
+    %17 = tosa.reshape %16 {new_shape = array<i64: 14336>} : (tensor<1x64x224xf32>) -> tensor<14336xf32>
+    return %17 : tensor<14336xf32>
+}
+
 // -----
 
 // CHECK-LABEL: mlir_dot_transpose_add
diff --git a/mlir/test/Dialect/MIGraphX/realize-int4.mlir b/mlir/test/Dialect/MIGraphX/realize-int4.mlir
index bf6d1637b2ef..ac682ef81b09 100644
--- a/mlir/test/Dialect/MIGraphX/realize-int4.mlir
+++ b/mlir/test/Dialect/MIGraphX/realize-int4.mlir
@@ -1,21 +1,30 @@
 // RUN: rocmlir-opt -migraphx-realize-int4 --split-input-file %s | FileCheck %s
 
-// CHECK-LABEL: @basic_signed
+// CHECK-LABEL: @basic_signless
 // CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xi4, 4x1>) -> !migraphx.shaped<8x4xi8, 4x1>
-func.func @basic_signed(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> {
+func.func @basic_signless(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> {
   // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xi4, 4x1> to <8x4xi8, 4x1>
   // CHECK: return %[[extended]]
-  %y = migraphx.unpack %x {axis = 1 : i64, isUnsigned = false} : <8x2xi8, 2x1> -> <8x4xi8, 4x1>
+  %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xi8, 2x1> -> <8x4xi8, 4x1>
   func.return %y : !migraphx.shaped<8x4xi8, 4x1>
 }
 
+// CHECK-LABEL: @basic_signed
+// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xsi4, 4x1>) -> !migraphx.shaped<8x4xsi8, 4x1>
+func.func @basic_signed(%x: !migraphx.shaped<8x2xsi8, 2x1>) -> !migraphx.shaped<8x4xsi8, 4x1> {
+  // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xsi4, 4x1> to <8x4xsi8, 4x1>
+  // CHECK: return %[[extended]]
+  %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xsi8, 2x1> -> <8x4xsi8, 4x1>
+  func.return %y : !migraphx.shaped<8x4xsi8, 4x1>
+}
+
 // CHECK-LABEL: @basic_unsigned
-// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xi4, 4x1>) -> !migraphx.shaped<8x4xi8, 4x1>
-func.func @basic_unsigned(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped<8x4xi8, 4x1> {
-  // CHECK: %[[extended:.+]] = migraphx.convert zero_extend %[[x]] : <8x4xi4, 4x1> to <8x4xi8, 4x1>
+// CHECK-SAME: (%[[x:.+]]: !migraphx.shaped<8x4xui4, 4x1>) -> !migraphx.shaped<8x4xui8, 4x1>
+func.func @basic_unsigned(%x: !migraphx.shaped<8x2xui8, 2x1>) -> !migraphx.shaped<8x4xui8, 4x1> {
+  // CHECK: %[[extended:.+]] = migraphx.convert %[[x]] : <8x4xui4, 4x1> to <8x4xui8, 4x1>
   // CHECK: return %[[extended]]
-  %y = migraphx.unpack %x {axis = 1 : i64, isUnsigned = true} : <8x2xi8, 2x1> -> <8x4xi8, 4x1>
-  func.return %y : !migraphx.shaped<8x4xi8, 4x1>
+  %y = migraphx.unpack %x {axis = 1 : i64} : <8x2xui8, 2x1> -> <8x4xui8, 4x1>
+  func.return %y : !migraphx.shaped<8x4xui8, 4x1>
 }
 
 // CHECK-LABEL: @transpose
@@ -25,7 +34,7 @@ func.func @basic_unsigned(%x: !migraphx.shaped<8x2xi8, 2x1>) -> !migraphx.shaped
 // CHECK: migraphx.convert %[[transposed]]
 func.func @transposed(%x: !migraphx.shaped<9x2x8xi8, 16x1x2>) -> !migraphx.shaped<9x8x4xi8, 32x4x1> {
   %transposed = migraphx.transpose %x {permutation = [0, 2, 1]} : <9x2x8xi8, 16x1x2> -> <9x8x2xi8, 16x2x1>
-  %y = migraphx.unpack %transposed {axis = 2 : i64, isUnsigned = false} : <9x8x2xi8, 16x2x1> -> <9x8x4xi8, 32x4x1>
+  %y = migraphx.unpack %transposed {axis = 2 : i64} : <9x8x2xi8, 16x2x1> -> <9x8x4xi8, 32x4x1>
   return %y : !migraphx.shaped<9x8x4xi8, 32x4x1>
 }
 
@@ -36,7 +45,7 @@ func.func @transposed(%x: !migraphx.shaped<9x2x8xi8, 16x1x2>) -> !migraphx.shape
 // CHECK: migraphx.convert %[[reshaped]]
 func.func @reshape_expand(%x: !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<9x2x8xi8, 16x8x1> {
   %reshaped = migraphx.reshape %x {dims = [9, 2, 4]} : <9x8xi8, 8x1> -> <9x2x4xi8, 8x4x1>
-  %y = migraphx.unpack %reshaped {axis = 2 : i64, isUnsigned = false} : <9x2x4xi8, 8x4x1> -> <9x2x8xi8, 16x8x1>
+  %y = migraphx.unpack %reshaped {axis = 2 : i64} : <9x2x4xi8, 8x4x1> -> <9x2x8xi8, 16x8x1>
   func.return %y : !migraphx.shaped<9x2x8xi8, 16x8x1>
 }
 
@@ -47,7 +56,7 @@ func.func @reshape_expand(%x: !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped
 // CHECK: migraphx.convert %[[reshaped]]
 func.func @reshape_collapse(%x: !migraphx.shaped<9x2x4xi8, 8x4x1>) -> !migraphx.shaped<9x16xi8, 16x1> {
   %reshaped = migraphx.reshape %x {dims = [9, 8]} : <9x2x4xi8, 8x4x1> -> <9x8xi8, 8x1>
-  %y = migraphx.unpack %reshaped {axis = 1 : i64, isUnsigned = false} : <9x8xi8, 8x1> -> <9x16xi8, 16x1>
+  %y = migraphx.unpack %reshaped {axis = 1 : i64} : <9x8xi8, 8x1> -> <9x16xi8, 16x1>
   func.return %y : !migraphx.shaped<9x16xi8, 16x1>
 }
 
@@ -58,6 +67,6 @@ func.func @reshape_collapse(%x: !migraphx.shaped<9x2x4xi8, 8x4x1>) -> !migraphx.
 // CHECK: migraphx.convert %[[mbcast]]
 func.func @multibroadcast(%x: !migraphx.shaped<1x4x1xi8, 1x1x1>) -> !migraphx.shaped<4x8x3xi8, 0x1x0> {
   %mbcast = migraphx.multibroadcast %x {out_lens = [4, 4, 3]} : <1x4x1xi8, 1x1x1> -> <4x4x3xi8, 0x1x0>
-  %y = migraphx.unpack %mbcast {axis = 1 : i64, isUnsigned = false} : <4x4x3xi8, 0x1x0> -> <4x8x3xi8, 0x1x0>
+  %y = migraphx.unpack %mbcast {axis = 1 : i64} : <4x4x3xi8, 0x1x0> -> <4x8x3xi8, 0x1x0>
   func.return %y : !migraphx.shaped<4x8x3xi8, 0x1x0>
 }
diff --git a/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir b/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir
new file mode 100644
index 000000000000..2e6b6e35f42b
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-conv1d-small.mlir
@@ -0,0 +1,8 @@
+// RUN: rocmlir-gen -fut mlir_convolution_add --arch %arch --clone-harness %s | rocmlir-driver -kernel-pipeline=migraphx | rocmlir-driver -host-pipeline=migraphx,highlevel | rocmlir-gen -ph -rand 1 -rand_type float -fut mlir_convolution_add_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal -kernel-pipeline full | xmir-runner --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+func.func private @mlir_convolution_add(%arg0: !migraphx.shaped<1x64x224xf32, 0x1x0>, %arg1: !migraphx.shaped<1x3x224xf32, 672x224x1>, %arg2: !migraphx.shaped<64x3x7xf32, 21x7x1>) -> !migraphx.shaped<1x64x224xf32, 14336x224x1> {
+  %0 = migraphx.convolution %arg1, %arg2 {dilation = [1], group = 1 : i64, padding = [3, 3], padding_mode = 0 : i64, stride = [1]} : <1x3x224xf32, 672x224x1>, <64x3x7xf32, 21x7x1> -> <1x64x224xf32, 14336x224x1>
+  %1 = migraphx.add %0, %arg0 : <1x64x224xf32, 14336x224x1>, <1x64x224xf32, 0x1x0> -> <1x64x224xf32, 14336x224x1>
+  return %1 : !migraphx.shaped<1x64x224xf32, 14336x224x1>
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir
new file mode 100644
index 000000000000..aaefbbaf0f77
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-sint8-negative-bias.mlir
@@ -0,0 +1,24 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [0, 3, -2, -1, -1, 4, -9, -1, -2, 3, -6, 0, -3, 2, -1, 4, 4, -10, 4, 1, -11, 2, -5, -8, -9, -8, -2, -1, 3, -9, -1, -9, 3, -1, -5, -1, -4, -11, -7, -8, 1, -7, -11, -7, -1, -5, -7, 3, -7]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xsi8, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1>
+    %arg4 = migraphx.literal (dense<-1> : tensor<1x1x7x7xsi8>) : <1x1x7x7xsi8, 7x7x7x1>
+    %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xsi8, 7x7x7x1> -> <1x1x7x7xsi8, 49x49x7x1>
+    return %4 : !migraphx.shaped<1x1x7x7xsi8, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir
new file mode 100644
index 000000000000..1108c388cb92
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-nobias.mlir
@@ -0,0 +1,23 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [1, 4, 0, 0, 0, 5, 0, 0, 0, 4, 0, 1, 0, 3, 0, 5, 5, 0, 5, 2, 0, 3, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 4, 0]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1>
+    %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    %4 = migraphx.quantizelinear %3, %arg3 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1>
+    return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir
new file mode 100644
index 000000000000..7b669f7c2b08
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-overflow.mlir
@@ -0,0 +1,25 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [-4, -1, -6, -5, -5, -1, -13, -5, -6, -1, -10, -4, -7, -2, -5, -1, -1, -14, -1, -3, -15, -2, -9, -12, -13, -12, -6, -5, -1, -13, -5, -13, -1, -5, -9, -5, -8, -15, -11, -12, -3, -11, -15, -11, -5, -9, -11, -1, -11]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+// COM: Note that values are negative because they are printed as signed integers, TODO: fix this
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1>
+    %arg4 = migraphx.literal (dense<251> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1>
+    %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1>
+    return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir
new file mode 100644
index 000000000000..d5fb8198b45c
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8-underflow.mlir
@@ -0,0 +1,24 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [2, 5, 0, 1, 1, 6, 0, 1, 0, 5, 0, 2, 0, 4, 1, 6, 6, 0, 6, 3, 0, 4, 0, 0, 0, 0, 0, 1, 5, 0, 1, 0, 5, 1, 0, 1, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 5, 0]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1>
+    %arg4 = migraphx.literal (dense<1> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1>
+    %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1>
+    return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir
new file mode 100644
index 000000000000..a4235d2ec585
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-quant-uint8.mlir
@@ -0,0 +1,24 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [19, 22, 17, 18, 18, 23, 10, 18, 17, 22, 13, 19, 16, 21, 18, 23, 23, 9, 23, 20, 8, 21, 14, 11, 10, 11, 17, 18, 22, 10, 18, 10, 22, 18, 14, 18, 15, 8, 12, 11, 20, 12, 8, 12, 18, 14, 12, 22, 12]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xui8, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.1875> : tensor<1x1x7x7xf32>) : <1x1x7x7xf32, 7x7x7x1>
+    %arg4 = migraphx.literal (dense<18> : tensor<1x1x7x7xui8>) : <1x1x7x7xui8, 7x7x7x1>
+    %arg5 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    %4 = migraphx.quantizelinear %3, %arg3, %arg4 : <1x1x7x7xf32, 49x49x7x1>, <1x1x7x7xf32, 7x7x7x1>, !migraphx.shaped<1x1x7x7xui8, 7x7x7x1> -> <1x1x7x7xui8, 49x49x7x1>
+    return %4 : !migraphx.shaped<1x1x7x7xui8, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir
new file mode 100644
index 000000000000..ca68ac0ca17c
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-sint8-negative-bias.mlir
@@ -0,0 +1,21 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [1.3125, 0.84375, 1.6875, 1.59375, 1.59375, 0.5625, 3.09375, 1.40625, 1.6875, 0.65625, 2.4375, 1.3125, 1.96875, 0.9375, 1.5, 0.5625, 0.5625, 3.1875, 0.5625, 1.03125, 3.375, 0.9375, 2.34375, 2.8125, 3.09375, 2.8125, 1.6875, 1.59375, 0.65625, 3.09375, 1.40625, 2.90625, 0.84375, 1.40625, 2.34375, 1.40625, 2.0625, 3.28125, 2.625, 2.8125, 1.03125, 2.53125, 3.375, 2.71875, 1.40625, 2.25, 2.625, 0.84375, 2.53125]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[-7, -12, -3, -4, -4, -15, 12, -6, -3, -14, 5, -7, 0, -11, -5, -15, -15, 13, -15, -10, 15, -11, 4, 9, 12, 9, -3, -4, -14, 12, -6, 10, -12, -6, 4, -6, 1, 14, 7, 9, -10, 6, 15, 8, -6, 3, 7, -12, 6]> : tensor<49xsi8>) : <49xsi8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<-21> : tensor<1xsi8>) : <1xsi8, 1>
+    %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xsi8, 1> -> <1x1x7x7xsi8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xsi8, 1> -> <1x1x7x7xsi8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xsi8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xsi8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir
new file mode 100644
index 000000000000..8732e6857460
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8-nobias.mlir
@@ -0,0 +1,19 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [2.15625, 2.625, 1.78125, 1.875, 1.875, 2.90625, 0.375, 2.0625, 1.78125, 2.8125, 1.03125, 2.15625, 1.5, 2.53125, 1.96875, 2.90625, 2.90625, 0.28125, 2.90625, 2.4375, 0.09375, 2.53125, 1.125, 0.65625, 0.375, 0.65625, 1.78125, 1.875, 2.8125, 0.375, 2.0625, 0.5625, 2.625, 2.0625, 1.125, 2.0625, 1.40625, 0.1875, 0.84375, 0.65625, 2.4375, 0.9375, 0.09375, 0.75, 2.0625, 1.21875, 0.84375, 2.625, 0.9375]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir
new file mode 100644
index 000000000000..72f8520e0723
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dequant-conv-uint8.mlir
@@ -0,0 +1,21 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_dequantizelinear_convolution_quantizelinear --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -print-results -ph -fut mlir_dequantizelinear_convolution_quantizelinear_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// CHECK-NEXT: Unranked Memref base@ = {{.*}} rank = 1 offset = 0 sizes = [49] strides = [1] data =
+// CHECK-NEXT: [0.1875, 0.65625, -0.1875, -0.09375, -0.09375, 0.9375, -1.59375, 0.09375, -0.1875, 0.84375, -0.9375, 0.1875, -0.46875, 0.5625, 0, 0.9375, 0.9375, -1.6875, 0.9375, 0.46875, -1.875, 0.5625, -0.84375, -1.3125, -1.59375, -1.3125, -0.1875, -0.09375, 0.84375, -1.59375, 0.09375, -1.40625, 0.65625, 0.09375, -0.84375, 0.09375, -0.5625, -1.78125, -1.125, -1.3125, 0.46875, -1.03125, -1.875, -1.21875, 0.09375, -0.75, -1.125, 0.65625, -1.03125]
+// COM: tests fail is they have no arguments, that's why we have %dummy
+module {
+  func.func @mlir_dequantizelinear_convolution_quantizelinear(%dummy : !migraphx.shaped<9x8xi8, 8x1>) -> !migraphx.shaped<1x1x7x7xf32, 49x49x7x1> {
+    %arg0 = migraphx.literal (dense<[23, 28, 19, 20, 20, 31, 4, 22, 19, 30, 11, 23, 16, 27, 21, 31, 31, 3, 31, 26, 1, 27, 12, 7, 4, 7, 19, 20, 30, 4, 22, 6, 28, 22, 12, 22, 15, 2, 9, 7, 26, 10, 1, 8, 22, 13, 9, 28, 10]> : tensor<49xui8>) : <49xui8, 1>
+    %arg1 = migraphx.literal (dense<0.375> : tensor<1xf32>) : <1xf32, 1>
+    %arg2 = migraphx.literal (dense<21> : tensor<1xui8>) : <1xui8, 1>
+    %arg3 = migraphx.literal (dense<0.25> : tensor<1x1x1x1xf32>) : <1x1x1x1xf32, 1x1x1x1>
+
+    %arg0_reshaped = migraphx.reshape %arg0 {dims = [1, 1, 7, 7]} : <49xui8, 1> -> <1x1x7x7xui8, 49x49x7x1>
+    %0 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xf32, 1> -> <1x1x7x7xf32, 0x0x0x0>
+    %1 = migraphx.multibroadcast %arg2 {out_dyn_dims = [], out_lens = [1, 1, 7, 7]} : <1xui8, 1> -> <1x1x7x7xui8, 0x0x0x0>
+    %2 = migraphx.dequantizelinear %arg0_reshaped, %0, %1 : <1x1x7x7xui8, 49x49x7x1>, <1x1x7x7xf32, 0x0x0x0>, !migraphx.shaped<1x1x7x7xui8, 0x0x0x0> -> <1x1x7x7xf32, 49x49x7x1>
+    %3 = migraphx.convolution %2, %arg3 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : <1x1x7x7xf32, 49x49x7x1>, <1x1x1x1xf32, 1x1x1x1> -> <1x1x7x7xf32, 49x49x7x1>
+    return %3 : !migraphx.shaped<1x1x7x7xf32, 49x49x7x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir
index 1b8d21c8f4e1..725e19176027 100644
--- a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir
+++ b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16-1645.mlir
@@ -5,8 +5,8 @@
 module {
   func.func @mlir_unpack_int4_1645(%arg0: !migraphx.shaped<2x2xi8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xi8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} 
   {
-    %0 = migraphx.unpack %arg0 {axis = 1 : i64, isUnsigned = false} : <2x2xi8, 2x1> -> <2x4xi8, 4x1>
-    %1 = migraphx.unpack %arg2 {axis = 1 : i64, isUnsigned = false} : <2x1xi8, 1x1> -> <2x2xi8, 2x1>
+    %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xi8, 2x1> -> <2x4xi8, 4x1>
+    %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xi8, 1x1> -> <2x2xi8, 2x1>
     %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1>
     %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0>
     %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1>
diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir
index 983930b4194e..927ddcfff760 100644
--- a/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir
+++ b/mlir/test/fusion/pr-e2e/mixr-dot-int4-f16.mlir
@@ -9,7 +9,7 @@
 func.func private @mlir_unpack_dequantizelinear_dot(%arg0: !migraphx.shaped<1x4x8xi8, 32x8x1>, %arg1: !migraphx.shaped<1x16x4xf16, 64x4x1>) -> !migraphx.shaped<1x4x4xf16, 16x4x1>  {
   %0 = migraphx.literal (dense<[0.25]> : tensor<1xf16>) : <1xf16, 0>
   %1 = migraphx.multibroadcast %0 {out_dyn_dims = [], out_lens = [1, 5, 16]} : <1xf16, 0> -> <1x4x16xf16, 0x0x0>
-  %2 = migraphx.unpack %arg0 {axis = 2 : i64, isUnsigned = false} : <1x4x8xi8, 32x8x1> -> <1x4x16xi8, 64x16x1>
+  %2 = migraphx.unpack %arg0 {axis = 2 : i64} : <1x4x8xi8, 32x8x1> -> <1x4x16xi8, 64x16x1>
   %3 = migraphx.dequantizelinear %2, %1 : <1x4x16xi8, 64x16x1>, <1x4x16xf16, 0x0x0> -> <1x4x16xf16, 64x16x1>
   %4 = migraphx.dot %3, %arg1 : <1x4x16xf16, 64x16x1>, <1x16x4xf16, 64x4x1> -> <1x4x4xf16, 16x4x1>
   return %4 : !migraphx.shaped<1x4x4xf16, 16x4x1>
diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir
new file mode 100644
index 000000000000..18e26007a90e
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16-uint32.mlir
@@ -0,0 +1,28 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_unpack_uint4_f16_uint32 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut mlir_unpack_uint4_f16_uint32_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// COM: Runs the MIGraphX pipeline first to rewrite out the int4
+module {
+  func.func @mlir_unpack_uint4_f16_uint32(%arg0: !migraphx.shaped<2x2xui8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xui8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} 
+  {
+    %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xui8, 2x1> -> <2x4xui8, 4x1>
+    %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xui8, 1x1> -> <2x2xui8, 2x1>
+    %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1>
+    %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0>
+    %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1>
+    %5 = migraphx.reshape %1 {dims = [2, 2, 1]} : <2x2xui8, 2x1> -> <2x2x1xui8, 2x1x1>
+    %6 = migraphx.multibroadcast %5 {out_dyn_dims = [], out_lens = [2, 2, 2]} : <2x2x1xui8, 2x1x1> -> <2x2x2xui8, 2x1x0>
+    %7 = migraphx.reshape %6 {dims = [2, 4]} : <2x2x2xui8, 2x1x0> -> <2x4xui8, 4x1>
+    %8 = migraphx.dequantizelinear %0, %4, %7 : <2x4xui8, 4x1>, <2x4xf16, 4x1>, !migraphx.shaped<2x4xui8, 4x1> -> <2x4xf16, 4x1>
+    %9 = migraphx.transpose %8 {permutation = [1, 0]} : <2x4xf16, 4x1> -> <4x2xf16, 1x4>
+    %10 = migraphx.dot %9, %arg3 : <4x2xf16, 1x4>, <2x4xf16, 4x1> -> <4x4xf16, 4x1>
+    %11 = migraphx.relu %10 : <4x4xf16, 4x1> -> <4x4xf16, 4x1>
+    %12 = migraphx.convert %11 : <4x4xf16, 4x1> to <4x4xui32, 4x1>
+    %13 = migraphx.literal (dense<1> : tensor<1xui8>) : <1xui8, 0>
+    %14 = migraphx.multibroadcast %13 {out_dyn_dims = [], out_lens = [4, 4]} : <1xui8, 0> -> <4x4xui8, 0x0>
+    %15 = migraphx.literal (dense<1.0> : tensor<1xf16>) : <1xf16, 0>
+    %16 = migraphx.multibroadcast %15 {out_dyn_dims = [], out_lens = [4, 4]} : <1xf16, 0> -> <4x4xf16, 0x0>
+    %17 = migraphx.dequantizelinear %12, %16, %14 : <4x4xui32, 4x1>, <4x4xf16, 0x0>, !migraphx.shaped<4x4xui8, 0x0> -> <4x4xf16, 4x1>
+    return %17 : !migraphx.shaped<4x4xf16, 4x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir
new file mode 100644
index 000000000000..c0af377c3ff4
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-dot-uint4-f16.mlir
@@ -0,0 +1,21 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut mlir_unpack_uint4_f16 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut mlir_unpack_uint4_f16_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+// COM: Runs the MIGraphX pipeline first to rewrite out the int4
+module {
+  func.func @mlir_unpack_uint4_f16(%arg0: !migraphx.shaped<2x2xui8, 2x1>, %arg1: !migraphx.shaped<2x2x1x1x1x1xf16, 2x1x1x1x1x1>, %arg2: !migraphx.shaped<2x1xui8, 1x1>, %arg3: !migraphx.shaped<2x4xf16, 4x1>) -> !migraphx.shaped<4x4xf16, 4x1> // attributes {arch = "gfx90a:sramecc+:xnack-", kernel = "mixr", num_cu = 110 : i64} 
+  {
+    %0 = migraphx.unpack %arg0 {axis = 1 : i64} : <2x2xui8, 2x1> -> <2x4xui8, 4x1>
+    %1 = migraphx.unpack %arg2 {axis = 1 : i64} : <2x1xui8, 1x1> -> <2x2xui8, 2x1>
+    %2 = migraphx.reshape %arg1 {dims = [2, 2, 1, 1, 1, 1, 1]} : <2x2x1x1x1x1xf16, 2x1x1x1x1x1> -> <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1>
+    %3 = migraphx.multibroadcast %2 {out_dyn_dims = [], out_lens = [2, 2, 1, 1, 1, 1, 2]} : <2x2x1x1x1x1x1xf16, 2x1x1x1x1x1x1> -> <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0>
+    %4 = migraphx.reshape %3 {dims = [2, 4]} : <2x2x1x1x1x1x2xf16, 2x1x1x1x1x1x0> -> <2x4xf16, 4x1>
+    %5 = migraphx.reshape %1 {dims = [2, 2, 1]} : <2x2xui8, 2x1> -> <2x2x1xui8, 2x1x1>
+    %6 = migraphx.multibroadcast %5 {out_dyn_dims = [], out_lens = [2, 2, 2]} : <2x2x1xui8, 2x1x1> -> <2x2x2xui8, 2x1x0>
+    %7 = migraphx.reshape %6 {dims = [2, 4]} : <2x2x2xui8, 2x1x0> -> <2x4xui8, 4x1>
+    %8 = migraphx.dequantizelinear %0, %4, %7 : <2x4xui8, 4x1>, <2x4xf16, 4x1>, !migraphx.shaped<2x4xui8, 4x1> -> <2x4xf16, 4x1>
+    %9 = migraphx.transpose %8 {permutation = [1, 0]} : <2x4xf16, 4x1> -> <4x2xf16, 1x4>
+    %10 = migraphx.dot %9, %arg3 : <4x2xf16, 1x4>, <2x4xf16, 4x1> -> <4x4xf16, 4x1>
+    return %10 : !migraphx.shaped<4x4xf16, 4x1>
+  }
+}
diff --git a/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir b/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir
new file mode 100644
index 000000000000..b02d64b7383e
--- /dev/null
+++ b/mlir/test/fusion/pr-e2e/mixr-uint-division.mlir
@@ -0,0 +1,24 @@
+// RUN: rocmlir-driver -kernel-pipeline=migraphx %s | rocmlir-gen -fut migraphx_div_ui32 --arch %arch --clone-harness - | rocmlir-driver -host-pipeline=highlevel | rocmlir-gen -ph -fut migraphx_div_ui32_wrapper --verifier clone - | rocmlir-driver -host-pipeline mhal,runner -kernel-pipeline full | mlir-cpu-runner -O2 --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext,%conv_validation_wrapper_library_dir/libconv-validation-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_float16_utils%shlibext,%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext,%linalg_test_lib_dir/libmlir_async_runtime%shlibext --entry-point-result=void | FileCheck %s
+// ALLOW_RETRIES: 2
+// CHECK: [1 1 1]
+module {
+
+  func.func @migraphx_div_ui32(%arg0: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg1: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg2: !migraphx.shaped<1x2048x7x7xf32, 100352x49x7x1>, %arg3: !migraphx.shaped<1x2048x1x1xf32, 2048x1x1x1>, %arg4: !migraphx.shaped<1x1024x14x14xf32, 200704x196x14x1>, %arg5: !migraphx.shaped<2048x1024x1x1xf32, 1024x1x1x1>) -> !migraphx.shaped<1x2048x7x7xui32, 100352x49x7x1> {
+    %0 = migraphx.multibroadcast %arg3 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0>
+    %1 = migraphx.multibroadcast %arg1 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0>
+    %2 = migraphx.multibroadcast %arg0 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1x2048x1x1xf32, 2048x1x1x1> -> <1x2048x7x7xf32, 0x1x0x0>
+    %3 = migraphx.convolution %arg4, %arg5 {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [2, 2]} : <1x1024x14x14xf32, 200704x196x14x1>, <2048x1024x1x1xf32, 1024x1x1x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %4 = migraphx.mul %2, %3 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %5 = migraphx.mul %1, %4 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %6 = migraphx.mul %2, %arg2 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %7 = migraphx.mul %1, %6 : <1x2048x7x7xf32, 0x1x0x0>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %8 = migraphx.add %7, %5 : <1x2048x7x7xf32, 100352x49x7x1>, <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %9 = migraphx.add %8, %0 : <1x2048x7x7xf32, 100352x49x7x1>, <1x2048x7x7xf32, 0x1x0x0> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %10 = migraphx.literal (dense<[2]> : tensor<1xui32>) : <1xui32, 0>
+    %11 = migraphx.multibroadcast %10 {out_dyn_dims = [], out_lens = [1, 2048, 7, 7]} : <1xui32, 0> -> <1x2048x7x7xui32, 0x0x0x0>
+    %12 = migraphx.relu %9 : <1x2048x7x7xf32, 100352x49x7x1> -> <1x2048x7x7xf32, 100352x49x7x1>
+    %13 = migraphx.convert %12 : <1x2048x7x7xf32, 100352x49x7x1> to <1x2048x7x7xui32, 100352x49x7x1>
+    %14 = migraphx.div %13, %11 : <1x2048x7x7xui32, 100352x49x7x1>, <1x2048x7x7xui32, 0x0x0x0> -> <1x2048x7x7xui32, 100352x49x7x1>
+    return %14 : !migraphx.shaped<1x2048x7x7xui32, 100352x49x7x1>
+  }
+}