tensorflow · copybara-service · Jul 23, 2025
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
@@ -118,6 +118,26 @@ cc_library(
         "//tensorflow/core:__pkg__",
         "//tensorflow/python:__subpackages__",
     ],
+    deps = [
+        ":c_api_headers",
+        ":tf_buffer_hdrs",
+        ":tf_datatype_hdrs",
+        ":tf_status_headers",
+        ":tf_tensor_hdrs",
+        "//tensorflow/core:core_cpu_base",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_lite",
+        "//tensorflow/core:op_gen_lib",
+        "//tensorflow/core:portable_gif_internal",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/framework:tensor",
+        "//tensorflow/core/framework:tensor_shape",
+        "//tensorflow/core/platform:casts",
+        "//tensorflow/core/platform:protobuf",
+        "//tensorflow/core/platform:status",
+        "@com_google_absl//absl/status",
+        "@local_xla//xla/tsl/platform:status",
+    ],
 )
 
 cc_library(
@@ -334,6 +354,7 @@ cc_library(
         ":c_api_macros",
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:stringprintf",
+        "@com_google_absl//absl/log",
     ],
 )
 
@@ -492,6 +513,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":c_api_macros_hdrs",
+        "//tensorflow/core:protos_all_cc",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
@@ -535,6 +557,8 @@ cc_library(
         ":tf_status_helper",
         ":tf_tensor_internal",
         "//tensorflow/core/platform:status",
+        "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/status",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:portable_tensorflow_lib_lite",  # TODO(annarev): exclude runtime srcs
@@ -602,6 +626,8 @@ cc_library(
         "//tensorflow/core/platform:platform_port",
         "//tensorflow/core/platform:protobuf",
         "//tensorflow/core/platform:status",
+        "//third_party/protobuf:protobuf_lite",
+        "@com_google_absl//absl/status",
     ],
 )
 
@@ -688,6 +714,9 @@ tf_cuda_library(
     visibility = ["//visibility:public"],
     deps = [
         ":tf_status_headers",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings:cord",
+        "@com_google_absl//absl/strings:string_view",
         "@local_xla//xla/tsl/c:tsl_status_helper",
         "@local_xla//xla/tsl/platform:status",
     ],
@@ -699,6 +728,8 @@ tf_cc_test(
     deps = [
         ":tf_status",
         ":tf_status_helper",
+        "@com_google_absl//absl/status",
+        "@com_google_absl//absl/strings:cord",
         "@com_google_googletest//:gtest_main",
         "@local_xla//xla/tsl/platform:errors",
         "@local_xla//xla/tsl/platform:status",
@@ -841,10 +872,13 @@ tf_cuda_library(
         ":tf_tensor_internal",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal_impl",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/lib/gtl:cleanup",
         "//tensorflow/core/platform:errors",
         "//tensorflow/core/platform:mutex",
         "//tensorflow/core/platform:refcount",
+        "@com_google_absl//absl/log",
+        "@com_google_absl//absl/status",
     ] + if_not_mobile([
         "//tensorflow/core/kernels:tensor_list",
         "//tensorflow/core/kernels:tensor_list_util",
@@ -869,6 +903,8 @@ tf_cuda_library(
         ":c_api_macros_hdrs",
         ":tf_datatype",
         ":tf_status_helper",
+        "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/status",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:portable_tensorflow_lib_lite",
@@ -1040,6 +1076,8 @@ tf_cc_test(
         "//tensorflow/core:lib",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/framework:graph_proto_cc",
+        "@com_google_absl//absl/log:check",
     ],
 )
 
@@ -1122,6 +1160,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/log:check",
         "@com_google_absl//absl/strings",
     ],
 )
@@ -1140,6 +1179,7 @@ tf_cuda_library(
     deps = [
         ":c_api",
         ":c_api_internal",
+        "@com_google_absl//absl/log:check",
         "//tensorflow/core:protos_all_cc",
         # TODO(b/74620627): remove when _USE_C_SHAPES is removed
     ],

diff --git a/tensorflow/c/kernels_experimental.cc b/tensorflow/c/kernels_experimental.cc
@@ -16,11 +16,17 @@ limitations under the License.
 #include "tensorflow/c/kernels_experimental.h"
 
 #include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
 #include <memory>
 #include <optional>
 #include <string>
 #include <utility>
+#include <vector>
 
+#include "absl/log/log.h"
+#include "absl/status/status.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/c/tf_status_internal.h"
 #include "tensorflow/c/tf_tensor_internal.h"
@@ -29,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/resource_var.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/lib/gtl/cleanup.h"
 

diff --git a/tensorflow/c/logging.cc b/tensorflow/c/logging.cc
@@ -14,6 +14,9 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/logging.h"
 
+#include <cstdarg>
+
+#include "absl/log/log.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stringprintf.h"
 

diff --git a/tensorflow/c/ops.cc b/tensorflow/c/ops.cc
@@ -15,11 +15,17 @@ limitations under the License.
 
 #include "tensorflow/c/ops.h"
 
+#include <cstddef>
+#include <cstdint>
+
+#include "absl/status/status.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/types.pb.h"
 
 using ::tensorflow::DataType;
 using ::tensorflow::OpDef;

diff --git a/tensorflow/c/ops_test.cc b/tensorflow/c/ops_test.cc
@@ -15,10 +15,15 @@ limitations under the License.
 
 #include "tensorflow/c/ops.h"
 
-#include "absl/strings/str_cat.h"
+#include <cstdint>
+#include <initializer_list>
+#include <string>
+
+#include "absl/log/check.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/shape_inference_testutil.h"

diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
@@ -15,8 +15,10 @@ limitations under the License.
 
 #include "tensorflow/c/python_api.h"
 
+#include <cstddef>
 #include <string>
 
+#include "absl/log/check.h"
 #include "tensorflow/c/c_api_internal.h"
 #include "tensorflow/core/framework/cpp_shape_inference.pb.h"
 #include "tensorflow/core/framework/full_type.pb.h"

diff --git a/tensorflow/c/tf_buffer.cc b/tensorflow/c/tf_buffer.cc
@@ -19,6 +19,8 @@ limitations under the License.
 #include <cstdint>
 #include <cstring>
 
+#include "absl/status/status.h"
+#include "third_party/protobuf/message_lite.h"
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/protobuf.h"  // IWYU pragma: keep

diff --git a/tensorflow/c/tf_datatype.cc b/tensorflow/c/tf_datatype.cc
@@ -15,7 +15,10 @@ limitations under the License.
 
 #include "tensorflow/c/tf_datatype.h"
 
+#include <cstddef>
+
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 
 size_t TF_DataTypeSize(TF_DataType dt) {
   return static_cast<size_t>(

diff --git a/tensorflow/c/tf_status_helper.cc b/tensorflow/c/tf_status_helper.cc
@@ -17,6 +17,9 @@ limitations under the License.
 
 #include <string>
 
+#include "absl/status/status.h"
+#include "absl/strings/cord.h"
+#include "absl/strings/string_view.h"
 #include "tensorflow/c/tf_status.h"
 #include "xla/tsl/c/tsl_status_helper.h"
 

diff --git a/tensorflow/c/tf_status_helper.h b/tensorflow/c/tf_status_helper.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
+#include "absl/status/status.h"
 #include "tensorflow/c/tf_status.h"
 #include "xla/tsl/platform/status.h"
 

diff --git a/tensorflow/c/tf_status_helper_test.cc b/tensorflow/c/tf_status_helper_test.cc
@@ -15,6 +15,10 @@ limitations under the License.
 
 #include "tensorflow/c/tf_status_helper.h"
 
+#include <string>
+
+#include "absl/status/status.h"
+#include "absl/strings/cord.h"
 #include "xla/tsl/platform/errors.h"
 #include "xla/tsl/platform/test.h"
 

diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc
@@ -15,10 +15,16 @@ limitations under the License.
 
 #include "tensorflow/c/tf_tensor.h"
 
-#include <memory>
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
 #include <utility>
 #include <vector>
 
+#include "absl/log/check.h"
+#include "absl/status/status.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/c/tf_tensor_internal.h"

diff --git a/tensorflow/c/tf_tstring.cc b/tensorflow/c/tf_tstring.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/c/tf_tstring.h"
 
+#include <cstddef>
+
 #include "tensorflow/core/platform/ctstring_internal.h"
 
 void TF_StringInit(TF_TString *tstr) { TF_TString_Init(tstr); }

diff --git a/tensorflow/c/while_loop_test.cc b/tensorflow/c/while_loop_test.cc
@@ -13,10 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cstdint>
+#include <initializer_list>
 #include <memory>
+#include <utility>
+#include <vector>
 
+#include "absl/log/check.h"
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_test_util.h"
+#include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/strcat.h"
 #include "tensorflow/core/platform/test.h"

diff --git a/tensorflow/compiler/jit/device_compilation_cache.h b/tensorflow/compiler/jit/device_compilation_cache.h
@@ -114,9 +114,7 @@ class DeviceCompilationCache {
           }
 
           const mutex_lock entry_lock(entry->mu);
-          if (entry->compilation_result != nullptr) {
-            entry->compilation_result->computation.reset();
-          }
+          entry->compilation_result->computation.reset();
 
           return false;
         });

diff --git a/tensorflow/compiler/jit/device_compiler.h b/tensorflow/compiler/jit/device_compiler.h
@@ -382,9 +382,6 @@ DeviceCompiler<ExecutableType, ClientType>::CompileStrict(
   cache_->Store(sig, cache_value.compile_state, cache_value.compilation_status,
                 std::move(out_compilation_result), std::move(out_executable));
 
-  // Finalize the cache to release the XlaComputation after it was compiled.
-  cache_->Finalize();
-
   const uint64 compile_end_us = env->NowMicros();
   const uint64 compile_time_us = compile_end_us - compile_start_us;
 

diff --git a/third_party/xla/third_party/triton/temporary/convert_layout_heuristic.patch b/third_party/xla/third_party/triton/temporary/convert_layout_heuristic.patch
@@ -0,0 +1,28 @@
+This chanage prevents convert_layout propagation of slice layouts across
+broadcast/expand_dims.
+
+It is generally a good idea, but it is especially important when running
+triton-xla-squeeze-dims, which removes expand_dims ops. The expand_dims ops
+remove the slice layouts, so we need to prevent them from propagating
+further across broadcast ops which can cause large tensor materialization.
+
+See also b/422133176.
+
+--- a/lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp	2025-05-20 08:08:14.000000000 -0700
++++ b/lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp	2025-07-22 04:36:43.000000000 -0700
+@@ -300,6 +300,15 @@
+         continue;
+       }
+     }
++    // Heuristic: don't propagate slice layouts across broadcasts.
++    // This can cause massive register pressure. It's better to convert to
++    // blocked before the broadcast.
++    if (isa<BroadcastOp>(user) &&
++        llvm::any_of(info.encodings, [](Attribute encoding) {
++          return llvm::isa_and_nonnull<SliceEncodingAttr>(encoding);
++        })) {
++      continue;
++    }
+     if (user->hasTrait<OpTrait::SameOperandsAndResultEncoding>() ||
+         user->hasTrait<OpTrait::Elementwise>() ||
+         isa<ReduceOp, ExpandDimsOp, ReshapeOp, TransOp, JoinOp, SplitOp,
diff --git a/third_party/xla/third_party/triton/temporary/series.bzl b/third_party/xla/third_party/triton/temporary/series.bzl
@@ -15,5 +15,6 @@ those to this list.
 
 temporary_patch_list = [
     "//third_party/triton:temporary/mem_sync_scope_agent_to_device.patch",
+    "//third_party/triton:temporary/convert_layout_heuristic.patch",
     # Add new patches just above this line
 ]