Add int8/int16 support for SQRT op to AEQ

marialyu · tensorflower-gardener · commit 7db3f35517e5 · 2025-07-23T14:23:20.000-07:00
PiperOrigin-RevId: 785924220
diff --git a/RELEASE.md b/RELEASE.md
@@ -19,6 +19,9 @@
 
 *   <IF RELEASE CONTAINS MULTIPLE FEATURES FROM SAME AREA, GROUP THEM TOGETHER>
 
+* `tf.lite`
+    * Adds int8 and int16x8 support for SQRT operator.
+
 ### Bug Fixes and Other Changes
 
 * <SIMILAR TO ABOVE SECTION, BUT FOR OTHER IMPORTANT CHANGES / BUG FIXES>
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
@@ -104,7 +104,6 @@ INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(LocalResponseNormalizationOp);
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(NegOp);
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(RoundOp);
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(SinOp);
-INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(SqrtOp);
 INFER_RETURN_TYPE_COMPONENTS_FROM_OPERANDS(SquareOp);
 // go/keep-sorted end
 
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.td b/tensorflow/compiler/mlir/lite/ir/tfl_ops.td
@@ -3401,19 +3401,29 @@ def TFL_SoftmaxOp : TFL_Op<"softmax", [
 
 def TFL_SqrtOp: TFL_Op<"sqrt", [
     Pure,
-    TF_SameOperandsAndResultTypeResolveRef]> {
+    QuantizableResult,
+    TFL_SameFirstOperandAndFirstResultElementType,
+    SameOperandsAndResultShape]> {
   let summary = "Square root operator";
 
   let description = [{
     Computes element-wise Square root of input
   }];
 
-  let arguments = (ins TFL_FpTensor:$x);
+  let arguments = (ins TFL_TensorOf<[F32, QI8, QI16]>:$x);
 
-  let results = (outs TFL_FpTensor:$y);
+  let results = (outs TFL_TensorOf<[F32, QI8, QI16]>:$y);
 
   let hasFolder = 1;
 
+  let builders = [
+    OpBuilder<(ins "Value":$input),
+    [{
+      $_state.addOperands({input});
+      $_state.addTypes(input.getType());
+    }]>
+  ];
+
   let extraClassDeclaration = [{
     // Returns whether the return types are compatible.
     static bool isCompatibleReturnTypes(TypeRange l, TypeRange r) {
diff --git a/tensorflow/lite/core/kernels/register.cc b/tensorflow/lite/core/kernels/register.cc
@@ -250,7 +250,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(),
              /* min_version = */ 1,
              /* max_version = */ 3);
-  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
+  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT(),
+             /* min_version = */ 1,
+             /* max_version = */ 2);
   AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT(),
              /* min_version = */ 1,
              /* max_version = */ 3);
diff --git a/tensorflow/lite/kernels/elementwise.cc b/tensorflow/lite/kernels/elementwise.cc
@@ -70,6 +70,10 @@ bool IsRsqrtSupportedType(const TfLiteType type) {
   return type == kTfLiteFloat32 || type == kTfLiteInt8 || type == kTfLiteInt16;
 }
 
+bool IsSqrtSupportedType(const TfLiteType type) {
+  return type == kTfLiteFloat32 || type == kTfLiteInt8 || type == kTfLiteInt16;
+}
+
 bool IsLogSupportedType(const TfLiteType type) {
   return type == kTfLiteFloat32 || type == kTfLiteInt8 || type == kTfLiteInt16;
 }
@@ -354,8 +358,59 @@ TfLiteStatus LogEval(TfLiteContext* context, TfLiteNode* node) {
   }
 }
 
+template <typename T>
+TfLiteStatus SqrtEvalQuantized(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+
+  const auto* input_params =
+      reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
+  const auto* output_params =
+      reinterpret_cast<TfLiteAffineQuantization*>(output->quantization.params);
+  const float input_scale = input_params->scale->data[0];
+  const int input_zp = input_params->zero_point->data[0];
+  const float output_scale = output_params->scale->data[0];
+  const int output_zp = output_params->zero_point->data[0];
+
+  const int64_t num_elements = NumElements(input);
+  const T* in_data = GetTensorData<T>(input);
+  T* out_data = GetTensorData<T>(output);
+
+  const int kMin = std::numeric_limits<T>::min();
+  const int kMax = std::numeric_limits<T>::max();
+
+  for (int64_t i = 0; i < num_elements; ++i) {
+    const float dequantized_input =
+        input_scale * (static_cast<int>(in_data[i]) - input_zp);
+    TF_LITE_ENSURE_MSG(context, dequantized_input >= 0.0f,
+                       "Sqrt is only defined for non-negative values");
+    const float float_output = std::sqrt(dequantized_input);
+    const int quantized_output =
+        static_cast<int>(float_output / output_scale) + output_zp;
+    out_data[i] =
+        static_cast<T>(std::min(std::max(quantized_output, kMin), kMax));
+  }
+  return kTfLiteOk;
+}
+
 TfLiteStatus SqrtEval(TfLiteContext* context, TfLiteNode* node) {
-  return EvalNumeric(context, node, std::sqrt);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+  const TfLiteType type = input->type;
+  switch (type) {
+    case kTfLiteFloat32:
+      return EvalNumeric(context, node, std::sqrt);
+    case kTfLiteInt8:
+      return SqrtEvalQuantized<int8_t>(context, node);
+    case kTfLiteInt16:
+      return SqrtEvalQuantized<int16_t>(context, node);
+    default:
+      TF_LITE_KERNEL_LOG(context, "Current data type %s is not supported.",
+                         TfLiteTypeGetName(type));
+      return kTfLiteError;
+  }
 }
 
 TfLiteStatus RsqrtEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
@@ -494,10 +549,11 @@ TfLiteRegistration* Register_LOG() {
   return &r;
 }
 
-GENERIC_PREPARE(PrepareSqrt, elementwise::IsNumericSupportedType, "Sqrt")
+GENERIC_PREPARE(PrepareSqrt, elementwise::IsSqrtSupportedType, "Sqrt")
 
 TfLiteRegistration* Register_SQRT() {
-  static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
+  static TfLiteRegistration r = {elementwise::ElementWiseQuantizedInit,
+                                 elementwise::ElementWiseQuantizedFree,
                                  PrepareSqrt, elementwise::SqrtEval};
   return &r;
 }
diff --git a/tensorflow/lite/kernels/elementwise_test.cc b/tensorflow/lite/kernels/elementwise_test.cc
@@ -344,6 +344,91 @@ TEST(ElementWise, Sqrt) {
   EXPECT_THAT(m.GetTensorShape(m.output()), ElementsAreArray({1, 1, 4, 1}));
 }
 
+TEST(ElementWise, SqrtInt8) {
+  const std::vector<float> input_data = {0, 1, 2, 9, 16, 25, 1.44, 0.5};
+  std::vector<float> expected_output(input_data.size());
+  for (int i = 0; i < expected_output.size(); i++) {
+    expected_output[i] = std::sqrt(input_data[i]);
+  }
+  const std::vector<int> shape = {1, 8};
+  float kInputScale = 25.0 / 255.0;
+  float kOutputScale = 5.0 / 255.0;
+  int32_t zero_point = -128;
+  ElementWiseOpQuantizedModel m(
+      BuiltinOperator_SQRT,
+      /*input_tensor_data=*/
+      {/*type=*/TensorType_INT8,
+       /*shape=*/shape,
+       /*min=*/0,
+       /*max=*/25.0,
+       /*scale=*/kInputScale,
+       /*zero_point=*/zero_point,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{kInputScale},
+       /*per_channel_quantization_offsets=*/{zero_point}},
+      /*output_tensor_data=*/
+      {/*type=*/TensorType_INT8,
+       /*shape=*/shape,
+       /*min=*/0,
+       /*max=*/5.0,
+       /*scale=*/kOutputScale,
+       /*zero_point=*/zero_point,
+       /*per_channel_quantization=*/true,
+       /*per_channel_quantization_scales=*/{kOutputScale},
+       /*per_channel_quantization_offsets=*/{zero_point}});
+  m.QuantizeAndPopulate<int8_t>(m.input(), input_data);
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.ExtractDequantVector<int8_t>(m.output()),
+              ElementsAreArray(ArrayFloatNear(expected_output, kInputScale)));
+}
+
+TEST(ElementWise, SqrtNegativeInt8) {
+  const std::vector<float> input_data = {-1.0};
+  float kInputScale = 1.0 / 255.0;
+  float kOutputScale = 1.0 / 255.0;
+  int32_t zero_point = 0;
+  ElementWiseOpQuantizedModel m(BuiltinOperator_SQRT,
+                                {TensorType_INT8,
+                                 {1, 1},
+                                 0,
+                                 1.0,
+                                 kInputScale,
+                                 zero_point,
+                                 true,
+                                 {kInputScale},
+                                 {zero_point}},
+                                {TensorType_INT8,
+                                 {1, 1},
+                                 0,
+                                 1.0,
+                                 kOutputScale,
+                                 zero_point,
+                                 true,
+                                 {kOutputScale},
+                                 {zero_point}});
+  m.QuantizeAndPopulate<int8_t>(m.input(), input_data);
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+}
+
+TEST(ElementWise, SqrtInt16) {
+  const std::vector<float> input_data = {0, 1, 2, 9, 16, 25, 1.44, 0.5};
+  std::vector<float> expected_output(input_data.size());
+  for (int i = 0; i < expected_output.size(); i++) {
+    expected_output[i] = std::sqrt(input_data[i]);
+  }
+
+  const float kQuantizedTolerance = GetQuantizationStep<int16_t>(-25, 25);
+
+  ElementWiseOpQuantizedModel m(BuiltinOperator_SQRT,
+                                {TensorType_INT16, {1, 8}, -25, 25},
+                                {TensorType_INT16, {1, 8}, -5, 5});
+  m.QuantizeAndPopulate<int16_t>(m.input(), input_data);
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(
+      m.ExtractDequantVector<int16_t>(m.output()),
+      ElementsAreArray(ArrayFloatNear(expected_output, kQuantizedTolerance)));
+}
+
 TEST(ElementWise, Rsqrt) {
   ElementWiseOpFloatModel m(BuiltinOperator_RSQRT, {1, 1, 4, 1});
   m.PopulateTensor<float>(m.input(), {1, 2, 4, 9});
diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc
@@ -445,7 +445,9 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() {
   AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL(),
              /* min_version = */ 1,
              /* max_version = */ 3);
-  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
+  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT(),
+             /* min_version = */ 1,
+             /* max_version = */ 2);
   AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT(),
              /* min_version = */ 1,
              /* max_version = */ 3);
diff --git a/tensorflow/lite/tools/versioning/op_version.cc b/tensorflow/lite/tools/versioning/op_version.cc
@@ -1045,6 +1045,7 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
     case BuiltinOperator_EXP:
     case BuiltinOperator_LOG:
     case BuiltinOperator_REDUCE_PROD:
+    case BuiltinOperator_SQRT:
       if (op_sig.inputs.at(0).type == kTfLiteInt8 ||
           op_sig.inputs.at(0).type == kTfLiteInt16) {
         return 2;
diff --git a/tensorflow/lite/tools/versioning/op_version_test.cc b/tensorflow/lite/tools/versioning/op_version_test.cc
@@ -1438,4 +1438,17 @@ TEST(OpVersionTest, VersioningDynamicUpdateSliceTest) {
       std::vector<TfLiteType>{kTfLiteInt16, kTfLiteInt16, kTfLiteInt32});
   EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 4);
 }
+
+TEST(OpVersionTest, VersioningSqrtTest) {
+  OpSignature fake_op_sig = {};
+  fake_op_sig.op = BuiltinOperator_SQRT;
+  fake_op_sig.inputs = CreateOpSignatureTensorSpecs(kTfLiteFloat32);
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 1);
+
+  fake_op_sig.inputs = CreateOpSignatureTensorSpecs(kTfLiteInt8);
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2);
+
+  fake_op_sig.inputs = CreateOpSignatureTensorSpecs(kTfLiteInt16);
+  EXPECT_EQ(GetBuiltinOperatorVersion(fake_op_sig), 2);
+}
 }  // namespace tflite
diff --git a/tensorflow/lite/tools/versioning/runtime_version.cc b/tensorflow/lite/tools/versioning/runtime_version.cc
@@ -387,6 +387,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
               {{BuiltinOperator_LOG, 1}, "1.14.0"},
               {{BuiltinOperator_LOG, 2}, "2.15.0"},
               {{BuiltinOperator_SQRT, 1}, "1.10.0"},
+              {{BuiltinOperator_SQRT, 2}, "2.21.0"},
               {{BuiltinOperator_RSQRT, 1}, "1.10.0"},
               {{BuiltinOperator_RSQRT, 2}, "2.5.0"},
               {{BuiltinOperator_RSQRT, 3}, "2.15.0"},