Automated Code Change

tensorflower-gardener · tensorflower-gardener · commit 9ebe7e3f7512 · 2025-07-22T22:05:08.000-07:00
PiperOrigin-RevId: 785633377
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
@@ -355,6 +355,7 @@ cc_library(
         "//tensorflow/core/tpu:tpu_defs",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/base",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/memory",
@@ -641,6 +642,7 @@ cc_library(
         "//tensorflow/core/tfrt/common:async_value_tensor",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/cleanup",
+        "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc
@@ -604,7 +604,7 @@ void XlaLocalLaunchBase::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
           done);
       OP_REQUIRES_OK_ASYNC(ctx, LockVariables(absl::MakeSpan(variable_infos)),
                            done);
-      std::map<int, const Tensor*> resource_var_ptrs;
+      absl::flat_hash_map<int, const Tensor*> resource_var_ptrs;
       for (int i = 0; i < resources.size(); i++) {
         resource_var_ptrs[resources[i]] = variable_infos[i].var()->tensor();
       }
@@ -928,7 +928,7 @@ void XlaRunOp::Compute(OpKernelContext* ctx) {
   const xla::HloInputOutputAliasConfig& input_output_alias =
       closure.executable()->executable()->module().input_output_alias_config();
   absl::StatusOr<std::vector<xla::ExecutionInput>> execution_inputs;
-  std::map<int, const Tensor*> snapshot_ptrs;
+  absl::flat_hash_map<int, const Tensor*> snapshot_ptrs;
   {
     tsl::profiler::TraceMe hlo_module_activity(
         [&] {
diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "absl/log/check.h"
 #include "absl/log/log.h"
 #include "absl/memory/memory.h"
@@ -130,7 +131,7 @@ absl::Status XlaCompileOnDemandOp::Run(
           ? platform_info_.xla_device_metadata()->UseMultipleStreams()
           : false);
 
-  std::map<int, const Tensor*> snapshot_ptrs;
+  absl::flat_hash_map<int, const Tensor*> snapshot_ptrs;
   for (auto& p : variable_args) {
     snapshot_ptrs.emplace(p.first,
                           p.second.has_value() ? &p.second.value() : nullptr);
diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/cleanup/cleanup.h"
+#include "absl/container/flat_hash_map.h"
 #include "absl/container/flat_hash_set.h"
 #include "absl/status/status.h"
 #include "absl/types/span.h"
@@ -127,7 +128,7 @@ XlaComputationLaunchContext::XlaComputationLaunchContext(
 
 // Fills in `execution_input` with `buffer` for `index`.
 static void PopulateExecutionInputBuffer(xla::ExecutionInput& execution_input,
-                                         xla::ShapeIndex index,
+                                         const xla::ShapeIndex& index,
                                          se::DeviceMemoryBase buffer,
                                          bool donate_buffer, int device_ordinal,
                                          se::DeviceMemoryAllocator* allocator) {
@@ -149,12 +150,14 @@ absl::StatusOr<std::vector<xla::ExecutionInput>>
 XlaComputationLaunchContext::PopulateInputs(
     OpKernelContext* ctx,
     const XlaCompiler::CompilationResult* compilation_result,
-    const std::map<int, const Tensor*>& resource_vars,
+    const absl::flat_hash_map<int, const Tensor*>& resource_vars,
     int missing_ctx_input_prefix,
     const xla::HloInputOutputAliasConfig& input_output_alias) {
   std::vector<xla::ExecutionInput> arguments;
   arguments.reserve(compilation_result->xla_input_shapes.size());
 
+  xla::ShapeIndex root_index = {};
+
   for (int i = 0; i < compilation_result->xla_input_shapes.size(); ++i) {
     int arg_num = compilation_result->input_mapping[i];
     CHECK_GE(arg_num, missing_ctx_input_prefix);
@@ -176,9 +179,8 @@ XlaComputationLaunchContext::PopulateInputs(
                           ? resource_var_it->second
                           : &(ctx->input(arg_num - missing_ctx_input_prefix));
     CHECK(t);
-    bool donate_buffer =
-        t->RefCountIsOne() && is_updated_resource_variable &&
-        input_output_alias.ParameterHasAlias(i, xla::ShapeIndex{});
+    bool donate_buffer = t->RefCountIsOne() && is_updated_resource_variable &&
+                         input_output_alias.ParameterHasAlias(i, root_index);
     VLOG(3) << "Processing input: " << i
             << "; is_resource_variable=" << is_resource_variable
             << "; is_updated_resource_variable=" << is_updated_resource_variable
@@ -196,7 +198,7 @@ XlaComputationLaunchContext::PopulateInputs(
     arguments.emplace_back(&device_shape);
     xla::ExecutionInput& execution_input = arguments.back();
     se::DeviceMemoryBase dmem = XlaTensor::DeviceMemoryFromTensor(*t);
-    PopulateExecutionInputBuffer(execution_input, xla::ShapeIndex{}, dmem,
+    PopulateExecutionInputBuffer(execution_input, root_index, dmem,
                                  donate_buffer, device_ordinal_,
                                  xla_allocator_);
   }
@@ -222,7 +224,7 @@ static absl::StatusOr<Tensor> GetOrCreateTensorForOutput(
     int missing_ctx_input_prefix,
     const xla::HloInputOutputAliasConfig& input_output_alias,
     absl::Span<const int> input_mapping,
-    const std::map<int, const Tensor*>& resource_vars_snapshots,
+    const absl::flat_hash_map<int, const Tensor*>& resource_vars_snapshots,
     DataType output_dtype, const TensorShape& output_shape,
     Allocator* output_allocator, bool allocate_xla_tensors, se::Stream* stream,
     bool use_multiple_streams, std::shared_ptr<se::Event> definition_event) {
@@ -359,7 +361,7 @@ absl::Status XlaComputationLaunchContext::PopulateOutputs(
     ScopedShapedBuffer output, int missing_ctx_input_prefix,
     absl::Span<VariableInfo> variable_infos,
     const xla::HloInputOutputAliasConfig& input_output_alias,
-    const std::map<int, const Tensor*>& resource_vars) {
+    const absl::flat_hash_map<int, const Tensor*>& resource_vars) {
   se::Stream* stream =
       ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr;
   Allocator* allocator = ctx->device()->GetAllocator({});
diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include <set>
 #include <vector>
 
+#include "absl/container/flat_hash_map.h"
 #include "tensorflow/compiler/jit/variable_info.h"
 #include "tensorflow/compiler/jit/xla_tensor.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
@@ -188,7 +189,7 @@ class XlaComputationLaunchContext {
   absl::StatusOr<std::vector<xla::ExecutionInput>> PopulateInputs(
       OpKernelContext* ctx,
       const XlaCompiler::CompilationResult* compilation_result,
-      const std::map<int, const Tensor*>& resource_vars,
+      const absl::flat_hash_map<int, const Tensor*>& resource_vars,
       int missing_ctx_input_prefix,
       const xla::HloInputOutputAliasConfig& input_output_alias);
 
@@ -208,7 +209,7 @@ class XlaComputationLaunchContext {
       xla::ScopedShapedBuffer output, int missing_ctx_input_prefix,
       absl::Span<VariableInfo> variable_infos,
       const xla::HloInputOutputAliasConfig& input_output_alias,
-      const std::map<int, const Tensor*>& resource_vars);
+      const absl::flat_hash_map<int, const Tensor*>& resource_vars);
 
  private:
   xla::LocalClient* client_;
diff --git a/tensorflow/lite/java/jni/BUILD b/tensorflow/lite/java/jni/BUILD
@@ -2,7 +2,6 @@ load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable")
 
 package(
     # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],
-    default_visibility = ["//tensorflow/lite:__subpackages__"],
     licenses = ["notice"],
 )
 
diff --git a/third_party/xla/third_party/stablehlo/temporary.patch b/third_party/xla/third_party/stablehlo/temporary.patch
@@ -102,9 +102,40 @@ diff --ruN a/stablehlo/stablehlo/dialect/AssemblyFormat.cpp b/stablehlo/stablehl
    p.printRegion(cond, /*printEntryBlockArgs=*/false);
    p << " do ";
    p.printRegion(body, /*printEntryBlockArgs=*/false);
+diff --ruN a/stablehlo/stablehlo/dialect/StablehloOps.cpp b/stablehlo/stablehlo/dialect/StablehloOps.cpp
+--- stablehlo/stablehlo/dialect/StablehloOps.cpp
++++ stablehlo/stablehlo/dialect/StablehloOps.cpp
+@@ -2201,14 +2201,14 @@
+   locs.reserve(numValues);
+   for (auto i : inputs) {
+     auto iType = cast<ShapedType>(i.getType());
+-    blockArgTypes.push_back(iType.cloneWith(
+-        llvm::ArrayRef<int64_t>(std::nullopt), iType.getElementType()));
++    blockArgTypes.push_back(
++        iType.cloneWith(llvm::ArrayRef<int64_t>(), iType.getElementType()));
+     locs.push_back(i.getLoc());
+   }
+   for (auto i : init_values) {
+     auto iType = cast<ShapedType>(i.getType());
+-    blockArgTypes.push_back(iType.cloneWith(
+-        llvm::ArrayRef<int64_t>(std::nullopt), iType.getElementType()));
++    blockArgTypes.push_back(
++        iType.cloneWith(llvm::ArrayRef<int64_t>(), iType.getElementType()));
+     locs.push_back(i.getLoc());
+   }
+ 
 diff --ruN a/stablehlo/stablehlo/dialect/TypeInference.cpp b/stablehlo/stablehlo/dialect/TypeInference.cpp
 --- stablehlo/stablehlo/dialect/TypeInference.cpp
 +++ stablehlo/stablehlo/dialect/TypeInference.cpp
+@@ -1147,7 +1147,7 @@
+       *paddingOrErr,
+       /*lhsDilation=*/baseDilations.value_or(SmallVector<int64_t, 0>{}),
+       /*rhsDilation=*/windowDilations.value_or(SmallVector<int64_t, 0>{}),
+-      /*windowReversal=*/std::nullopt, location);
++      /*windowReversal=*/{}, location);
+   if (failed(windowOrErr)) return failure();
+ 
+   windowDims.append(windowDimensions.begin(), windowDimensions.end());
 @@ -2248,6 +2248,22 @@
    return success();
  }
diff --git a/third_party/xla/xla/backends/cpu/nanort/ifrt_client.cc b/third_party/xla/xla/backends/cpu/nanort/ifrt_client.cc
@@ -120,6 +120,8 @@ class NanoValue : public llvm::RTTIExtends<Self, Base> {
   // Called by subclasses to get access to client() without having to cast.
   NanoIfrtClient* nano_client() const { return client_; }
 
+  ifrt::UserContextRef user_context() const override { return {}; }
+
   // All nano values are immediately ready.
   ifrt::Future<> GetReadyFuture() const override { return Ready(); }
 
@@ -861,6 +863,8 @@ class NanoExecutable final
     return absl::UnimplementedError("Serialize is not implemented.");
   }
 
+  ifrt::UserContextRef user_context() const override { return {}; }
+
   ifrt::Future<> GetReadyFuture() const override { return Ready(); }
 
   int num_devices() const override { return 1; }
diff --git a/third_party/xla/xla/python/ifrt/executable.h b/third_party/xla/xla/python/ifrt/executable.h
@@ -38,6 +38,7 @@ limitations under the License.
 #include "xla/python/ifrt/future.h"
 #include "xla/python/ifrt/serdes_default_version_accessor.h"
 #include "xla/python/ifrt/serdes_version.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/xla_data.pb.h"
 
 namespace xla {
@@ -161,6 +162,11 @@ class LoadedExecutable
   // serialized executable is implementation-specific.
   virtual absl::StatusOr<std::string> Serialize() const = 0;
 
+  // Returns the user context associated with the creation of this executable.
+  // May be `nullptr` if the user context is unset or the runtime does not
+  // support it.
+  virtual UserContextRef user_context() const = 0;
+
   // Returns a future that becomes ready when the executable is ready to be
   // used for execution.
   //
diff --git a/third_party/xla/xla/python/ifrt/mock.cc b/third_party/xla/xla/python/ifrt/mock.cc
@@ -60,6 +60,9 @@ using ::testing::_;
 // LINT.IfChange(MockArrayDelegation)
 MockArray::MockArray(xla::ifrt::ArrayRef delegated)
     : delegated_(std::move(delegated)) {
+  ON_CALL(*this, user_context).WillByDefault([this]() {
+    return delegated_->user_context();
+  });
   ON_CALL(*this, GetReadyFuture).WillByDefault([this]() {
     return delegated_->GetReadyFuture();
   });
diff --git a/third_party/xla/xla/python/ifrt/mock.h b/third_party/xla/xla/python/ifrt/mock.h
@@ -82,6 +82,7 @@ class MockArray : public llvm::RTTIExtends<MockArray, Array> {
   MOCK_METHOD(ShardingRef, shared_ptr_sharding, (), (const, final));
   MOCK_METHOD(absl::StatusOr<std::shared_ptr<const xla::PjRtLayout>>,
               pjrt_layout, (), (const, final));
+  MOCK_METHOD(UserContextRef, user_context, (), (const, final));
   MOCK_METHOD(absl::StatusOr<std::vector<ArrayRef>>,
               DisassembleIntoSingleDeviceArrays,
               (ArrayCopySemantics array_copy_semantics,
@@ -290,6 +291,7 @@ class MockLoadedExecutable
   MOCK_METHOD(absl::StatusOr<std::optional<std::string>>, Fingerprint, (),
               (const, final));
   MOCK_METHOD(absl::StatusOr<std::string>, Serialize, (), (const, final));
+  MOCK_METHOD(UserContextRef, user_context, (), (const, final));
   MOCK_METHOD(Future<>, GetReadyFuture, (), (const, override));
   MOCK_METHOD(int, num_devices, (), (const, final));
   MOCK_METHOD(int64_t, SizeOfGeneratedCodeInBytes, (), (const, final));
diff --git a/third_party/xla/xla/python/ifrt/value.h b/third_party/xla/xla/python/ifrt/value.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "absl/status/status.h"
 #include "llvm/Support/ExtensibleRTTI.h"
 #include "xla/python/ifrt/future.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/tsl/concurrency/ref_count.h"
 
 namespace xla {
@@ -42,6 +43,11 @@ class Value : public tsl::ReferenceCounted<Value>,
 
   virtual Client* client() const = 0;
 
+  // Returns the user context associated with the creation of this array.
+  // May be `nullptr` if the user context is unset or the runtime does not
+  // support it.
+  virtual UserContextRef user_context() const = 0;
+
   // Returns a future that becomes ready when the buffer is computed or has an
   // error.
   virtual Future<> GetReadyFuture() const = 0;
diff --git a/third_party/xla/xla/python/ifrt_proxy/client/BUILD b/third_party/xla/xla/python/ifrt_proxy/client/BUILD
@@ -418,6 +418,7 @@ cc_library(
         "//xla/python/ifrt",
         "//xla/python/ifrt:attribute_map",
         "//xla/python/ifrt:sharding_serdes",
+        "//xla/python/ifrt:user_context",
         "//xla/python/ifrt_proxy/common:ifrt_service_proto_cc",
         "//xla/python/ifrt_proxy/common:types",
         "//xla/python/ifrt_proxy/common:versions",
diff --git a/third_party/xla/xla/python/ifrt_proxy/client/array.h b/third_party/xla/xla/python/ifrt_proxy/client/array.h
@@ -157,6 +157,7 @@ class Array final : public llvm::RTTIExtends<Array, xla::ifrt::Array> {
   ShardingRef shared_ptr_sharding() const override { return sharding_; }
   absl::StatusOr<std::shared_ptr<const PjRtLayout>> pjrt_layout()
       const override;
+  UserContextRef user_context() const override { return {}; }
 
   absl::StatusOr<std::vector<xla::ifrt::ArrayRef>>
   DisassembleIntoSingleDeviceArrays(
diff --git a/third_party/xla/xla/python/ifrt_proxy/client/executable.h b/third_party/xla/xla/python/ifrt_proxy/client/executable.h
@@ -44,6 +44,7 @@
 #include "xla/python/ifrt/executable.h"
 #include "xla/python/ifrt/future.h"
 #include "xla/python/ifrt/host_callback.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/python/ifrt_proxy/client/rpc_helper.h"
 #include "xla/tsl/concurrency/ref_count.h"
 #include "xla/xla_data.pb.h"
@@ -71,6 +72,7 @@ class LoadedExecutable final
   absl::string_view name() const override;
   absl::StatusOr<std::optional<std::string>> Fingerprint() const override;
   absl::StatusOr<std::string> Serialize() const override;
+  xla::ifrt::UserContextRef user_context() const override { return {}; }
   Future<> GetReadyFuture() const override;
 
   int num_devices() const override;
diff --git a/third_party/xla/xla/python/pjrt_ifrt/BUILD b/third_party/xla/xla/python/pjrt_ifrt/BUILD
@@ -437,6 +437,7 @@ cc_library(
         "//xla:xla_data_proto_cc",
         "//xla/pjrt:pjrt_layout",
         "//xla/python/ifrt",
+        "//xla/python/ifrt:user_context",
         "//xla/tsl/concurrency:ref_count",
         "//xla/tsl/platform:statusor",
         "@com_google_absl//absl/base:core_headers",
diff --git a/third_party/xla/xla/python/pjrt_ifrt/basic_string_array.h b/third_party/xla/xla/python/pjrt_ifrt/basic_string_array.h
@@ -40,6 +40,7 @@ limitations under the License.
 #include "xla/python/ifrt/memory.h"
 #include "xla/python/ifrt/shape.h"
 #include "xla/python/ifrt/sharding.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/tsl/concurrency/ref_count.h"
 
 namespace xla {
@@ -107,6 +108,8 @@ class BasicStringArray final
   absl::StatusOr<std::shared_ptr<const xla::PjRtLayout>> pjrt_layout()
       const override;
 
+  UserContextRef user_context() const override { return {}; }
+
   absl::StatusOr<std::vector<ArrayRef>> DisassembleIntoSingleDeviceArrays(
       ArrayCopySemantics array_copy_semantics,
       SingleDeviceShardSemantics single_device_shard_semantics) override;
diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.h b/third_party/xla/xla/python/pjrt_ifrt/pjrt_array.h
@@ -38,6 +38,7 @@ limitations under the License.
 #include "xla/python/ifrt/memory.h"
 #include "xla/python/ifrt/shape.h"
 #include "xla/python/ifrt/sharding.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/python/pjrt_ifrt/pjrt_client.h"
 #include "xla/tsl/concurrency/ref_count.h"
 
@@ -157,6 +158,8 @@ class PjRtArray final
   absl::StatusOr<std::shared_ptr<const xla::PjRtLayout>> pjrt_layout()
       const override;
 
+  UserContextRef user_context() const override { return {}; }
+
   absl::StatusOr<std::vector<ArrayRef>> DisassembleIntoSingleDeviceArrays(
       ArrayCopySemantics array_copy_semantics,
       SingleDeviceShardSemantics single_device_shard_semantics) override;
diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h b/third_party/xla/xla/python/pjrt_ifrt/pjrt_executable.h
@@ -43,6 +43,7 @@ limitations under the License.
 #include "xla/python/ifrt/host_callback.h"
 #include "xla/python/ifrt/shape.h"
 #include "xla/python/ifrt/sharding.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/python/pjrt_ifrt/pjrt_attribute_map_util.h"
 #include "xla/python/pjrt_ifrt/pjrt_client.h"
 #include "xla/python/pjrt_ifrt/pjrt_host_callback.h"
@@ -225,6 +226,8 @@ class PjRtLoadedExecutable final
         "PjRtLoadedExecutable::GetDonatableInputIndices is not implemented.");
   }
 
+  UserContextRef user_context() const override { return {}; }
+
   Future<> GetReadyFuture() const override {
     // PjRtCompiler blocks until compilation finishes and returns only the
     // executables that are ready.
diff --git a/third_party/xla/xla/python/pjrt_ifrt/pjrt_tuple.h b/third_party/xla/xla/python/pjrt_ifrt/pjrt_tuple.h
@@ -31,6 +31,7 @@ limitations under the License.
 #include "llvm/Support/ExtensibleRTTI.h"
 #include "xla/python/ifrt/array.h"
 #include "xla/python/ifrt/client.h"
+#include "xla/python/ifrt/user_context.h"
 #include "xla/python/pjrt_ifrt/pjrt_client.h"
 #include "xla/tsl/concurrency/ref_count.h"
 
@@ -49,6 +50,11 @@ class PjRtTuple final : public llvm::RTTIExtends<PjRtTuple, Tuple> {
     return client_;
   }
 
+  UserContextRef user_context() const override {
+    DCHECK(this);
+    return {};
+  }
+
   Future<> GetReadyFuture() const override;
 
   Future<> Delete() override;

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,6 @@ load("//tensorflow:tensorflow.default.bzl", "get_compatible_with_portable")`
`2`	`2`
`3`	`3`	`package(`
`4`	`4`	`# copybara:uncomment default_applicable_licenses = ["//tensorflow:license"],`
`5`		`- default_visibility = ["//tensorflow/lite:__subpackages__"],`
`6`	`5`	`licenses = ["notice"],`
`7`	`6`	`)`
`8`	`7`