Address more review comments.

mdfaijul · mdfaijul · commit ff46131f6096 · 2024-03-08T08:58:00.000-08:00
diff --git a/tensorflow/core/kernels/mkl/mkl_kernel_util.h b/tensorflow/core/kernels/mkl/mkl_kernel_util.h
@@ -49,6 +49,38 @@ class MklTestingUtil {
     *tensor_min = min();
     *tensor_max = max();
   }
+
+  // This utility function mimics Quantization of float/bfloat16 tensor with
+  // oneDNN backend QuantizeV2 operation. Since the op signature requires min
+  // and max values to be in float type, min_tensor and max_tensor should have
+  // their dtype set to DT_FLOAT.
+  template <typename T>
+  static void GetQuantizationTensors(const Tensor& input, Tensor* output,
+                                     DataType out_type, const string mode,
+                                     Tensor* min_tensor, Tensor* max_tensor) {
+    if (min_tensor->dtype() != DT_FLOAT || max_tensor->dtype() != DT_FLOAT) {
+      return absl::UnimplementedError("Tensor must be float32.");
+    }
+    T min;
+    T max;
+    ComputeMinMax<T>(input, &min, &max);
+
+    float adjusted_min = static_cast<float>(min);
+    float adjusted_max = static_cast<float>(max);
+    if (mode == "SCALED") {
+      if (output->dtype() != DT_QINT8) {
+        return absl::UnimplementedError("Tensor must be QInt8 in SCALED mode.");
+      }
+      float range = std::max(std::abs(adjusted_min), std::abs(adjusted_max));
+      adjusted_min = -range;
+      adjusted_max = range;
+    }
+    RunMklQuantizeOp(input, adjusted_min, adjusted_max, out_type, mode, output);
+    min_tensor->flat<float>()(0) = adjusted_min;
+    max_tensor->flat<float>()(0) = adjusted_max;
+
+    return OkStatus();
+  }
 };
 
 #ifdef ENABLE_ONEDNN_V3
diff --git a/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc b/tensorflow/core/kernels/mkl/onednn_fused_matmul_ops_test.cc
@@ -103,6 +103,12 @@ class FusedMatMulOpsTest : public OpsTestBase {
       string output_quant_mode, bool is_bias_quantized, bool is_perchannel,
       bool requantize, float output_min, float output_max)>;
 
+  bool HasQuantizationSupport() {
+    return TestCPUFeature(tensorflow::port::CPUFeature::AVX_VNNI_INT8) ||
+           TestCPUFeature(tensorflow::port::CPUFeature::AVX512_VNNI) ||
+           TestCPUFeature(port::CPUFeature::AMX_INT8);
+  }
+
   // Runs a Tensorflow graph defined by the root scope, and fetches the result
   // of 'fetch' node into the outputs. Optional `add_nodes` parameter
   // allows to define nodes directly using NodeDefBuilder.
@@ -617,7 +623,7 @@ class FusedMatMulOpsTest : public OpsTestBase {
   //        true: requantized
   //    (5) weight matrix is transposed : {false, true}
   void VerifyQuantizedMatMul(std::vector<string> fused_ops) {
-    if (!IsMKLEnabled()) {
+    if (!HasQuantizationSupport()) {
       GTEST_SKIP() << "oneDNN based Quantized ops are not enabled on this CPU.";
     }
     const GraphRunner run_default =