diff --git a/.github/workflows/update-nightly.yml b/.github/workflows/update-nightly.yml
deleted file mode 100644
index 01b5147d0538f7..00000000000000
--- a/.github/workflows/update-nightly.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-on:
-  workflow_dispatch:  # Allow manual triggers
-  schedule:
-    - cron: 0 4 * * *  # 4am UTC is 9pm PDT and 8pm PST
-name: Set nightly branch to master HEAD
-jobs:
-  master-to-nightly:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: zofrex/mirror-branch@v1
-      name: Set nightly branch to master HEAD
-      with:
-        target-branch: 'nightly'
diff --git a/.zenodo.json b/.zenodo.json
new file mode 100644
index 00000000000000..7161180c51ae3e
--- /dev/null
+++ b/.zenodo.json
@@ -0,0 +1,13 @@
+{
+    "description": "TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries, and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML-powered applications.",
+    "license": "Apache-2.0",
+    "title": "TensorFlow",
+    "upload_type": "software",
+    "creators": [
+        {
+            "name": "TensorFlow Developers"
+        }
+    ],
+    "access_right": "open",
+    "notes": "Specific TensorFlow versions can be found in the \"Versions\" list on the right side of this page.<br>See the full list of authors <a href=\"https://github.com/tensorflow/tensorflow/graphs/contributors\">on GitHub</a>."
+}
diff --git a/RELEASE.md b/RELEASE.md
index 97b4f796f7d66d..88d3b7b2e0e753 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,16 +1,242 @@
+# Release 2.4.4
+
+This release introduces several vulnerability fixes:
+
+*   Fixes a code injection issue in `saved_model_cli` ([CVE-2021-41228](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41228))
+*   Fixes a vulnerability due to use of uninitialized value in Tensorflow ([CVE-2021-41225](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41225))
+*   Fixes a heap OOB in `FusedBatchNorm` kernels ([CVE-2021-41223](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41223))
+*   Fixes an arbitrary memory read in `ImmutableConst` ([CVE-2021-41227](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41227))
+*   Fixes a heap OOB in `SparseBinCount` ([CVE-2021-41226](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41226))
+*   Fixes a heap OOB in `SparseFillEmptyRows` ([CVE-2021-41224](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41224))
+*   Fixes a segfault due to negative splits in `SplitV` ([CVE-2021-41222](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41222))
+*   Fixes segfaults and vulnerabilities caused by accesses to invalid memory during shape inference in `Cudnn*` ops ([CVE-2021-41221](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41221))
+*   Fixes a null pointer exception when `Exit` node is not preceded by `Enter` op ([CVE-2021-41217](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41217))
+*   Fixes an integer division by 0 in `tf.raw_ops.AllToAll` ([CVE-2021-41218](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41218))
+*   Fixes an undefined behavior via `nullptr` reference binding in sparse matrix multiplication ([CVE-2021-41219](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41219))
+*   Fixes a heap buffer overflow in `Transpose` ([CVE-2021-41216](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41216))
+*   Prevents deadlocks arising from mutually recursive `tf.function` objects ([CVE-2021-41213](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41213))
+*   Fixes a null pointer exception in `DeserializeSparse` ([CVE-2021-41215](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41215))
+*   Fixes an undefined behavior arising from reference binding to `nullptr` in `tf.ragged.cross` ([CVE-2021-41214](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41214))
+*   Fixes a heap OOB read in `tf.ragged.cross` ([CVE-2021-41212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41212))
+*   Fixes a heap OOB read in all `tf.raw_ops.QuantizeAndDequantizeV*` ops ([CVE-2021-41205](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41205))
+*   Fixes an FPE in `ParallelConcat` ([CVE-2021-41207](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41207))
+*   Fixes FPE issues in convolutions with zero size filters ([CVE-2021-41209](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41209))
+*   Fixes a heap OOB read in `tf.raw_ops.SparseCountSparseOutput` ([CVE-2021-41210](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41210))
+*   Fixes vulnerabilities caused by incomplete validation in boosted trees code ([CVE-2021-41208](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41208))
+*   Fixes vulnerabilities caused by incomplete validation of shapes in multiple TF ops ([CVE-2021-41206](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41206))
+*   Fixes a segfault produced while copying constant resource tensor ([CVE-2021-41204](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41204))
+*   Fixes a vulnerability caused by unitialized access in `EinsumHelper::ParseEquation` ([CVE-2021-41201](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41201))
+*   Fixes several vulnerabilities and segfaults caused by missing validation during checkpoint loading ([CVE-2021-41203](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41203))
+*   Fixes an overflow producing a crash in `tf.range` ([CVE-2021-41202](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41202))
+*   Fixes an overflow producing a crash in `tf.image.resize` when size is large ([CVE-2021-41199](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41199))
+*   Fixes an overflow producing a crash in `tf.tile` when tiling tensor is large ([CVE-2021-41198](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41198))
+*   Fixes a vulnerability produced due to incomplete validation in `tf.summary.create_file_writer` ([CVE-2021-41200](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41200))
+*   Fixes multiple crashes due to overflow and `CHECK`-fail in ops with large tensor shapes ([CVE-2021-41197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41197))
+*   Fixes a crash in `max_pool3d` when size argument is 0 or negative ([CVE-2021-41196](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41196))
+*   Fixes a crash in `tf.math.segment_*` operations ([CVE-2021-41195](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41195))
+*   Updates `curl` to `7.78.0` to handle
+    [CVE-2021-22922](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22922),
+    [CVE-2021-22923](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22923),
+    [CVE-2021-22924](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22924),
+    [CVE-2021-22925](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22925),
+    and
+    [CVE-2021-22926](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22926).
+
+# Release 2.4.3
+
+This release introduces several vulnerability fixes:
+
+* Fixes a heap out of bounds access in sparse reduction operations ([CVE-2021-37635](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37635))
+* Fixes a floating point exception in `SparseDenseCwiseDiv` ([CVE-2021-37636](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37636))
+* Fixes a null pointer dereference in `CompressElement` ([CVE-2021-37637](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37637))
+* Fixes a null pointer dereference in `RaggedTensorToTensor` ([CVE-2021-37638](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37638))
+* Fixes a null pointer dereference and a heap OOB read arising from operations restoring tensors ([CVE-2021-37639](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37639))
+* Fixes an integer division by 0 in sparse reshaping ([CVE-2021-37640](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37640))
+* Fixes a division by 0 in `ResourceScatterDiv` ([CVE-2021-37642](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37642))
+* Fixes a heap OOB in `RaggedGather` ([CVE-2021-37641](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37641))
+* Fixes a `std::abort` raised from `TensorListReserve` ([CVE-2021-37644](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37644))
+* Fixes a null pointer dereference in `MatrixDiagPartOp` ([CVE-2021-37643](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37643))
+* Fixes an integer overflow due to conversion to unsigned ([CVE-2021-37645](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37645))
+* Fixes a bad allocation error in `StringNGrams` caused by integer conversion ([CVE-2021-37646](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37646))
+* Fixes a null pointer dereference in `SparseTensorSliceDataset` ([CVE-2021-37647](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37647))
+* Fixes an incorrect validation of `SaveV2` inputs ([CVE-2021-37648](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37648))
+* Fixes a null pointer dereference in `UncompressElement` ([CVE-2021-37649](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37649))
+* Fixes a segfault and a heap buffer overflow in `{Experimental,}DatasetToTFRecord` ([CVE-2021-37650](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37650))
+* Fixes a heap buffer overflow in `FractionalAvgPoolGrad` ([CVE-2021-37651](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37651))
+* Fixes a use after free in boosted trees creation ([CVE-2021-37652](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37652))
+* Fixes a division by 0 in `ResourceGather` ([CVE-2021-37653](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37653))
+* Fixes a heap OOB and a `CHECK` fail in `ResourceGather` ([CVE-2021-37654](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37654))
+* Fixes a heap OOB in `ResourceScatterUpdate` ([CVE-2021-37655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37655))
+* Fixes an undefined behavior arising from reference binding to nullptr in `RaggedTensorToSparse` ([CVE-2021-37656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37656))
+* Fixes an undefined behavior arising from reference binding to nullptr in `MatrixDiagV*` ops ([CVE-2021-37657](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37657))
+* Fixes an undefined behavior arising from reference binding to nullptr in `MatrixSetDiagV*` ops ([CVE-2021-37658](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37658))
+* Fixes an undefined behavior arising from reference binding to nullptr and heap OOB in binary cwise ops ([CVE-2021-37659](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37659))
+* Fixes a division by 0 in inplace operations ([CVE-2021-37660](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37660))
+* Fixes a crash caused by integer conversion to unsigned ([CVE-2021-37661](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37661))
+* Fixes an undefined behavior arising from reference binding to nullptr in boosted trees ([CVE-2021-37662](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37662))
+* Fixes a heap OOB in boosted trees ([CVE-2021-37664](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37664))
+* Fixes vulnerabilities arising from incomplete validation in `QuantizeV2` ([CVE-2021-37663](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37663))
+* Fixes vulnerabilities arising from incomplete validation in MKL requantization ([CVE-2021-37665](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37665))
+* Fixes an undefined behavior arising from reference binding to nullptr in `RaggedTensorToVariant` ([CVE-2021-37666](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37666))
+* Fixes an undefined behavior arising from reference binding to nullptr in unicode encoding ([CVE-2021-37667](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37667))
+* Fixes an FPE in `tf.raw_ops.UnravelIndex` ([CVE-2021-37668](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37668))
+* Fixes a crash in NMS ops caused by integer conversion to unsigned ([CVE-2021-37669](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37669))
+* Fixes a heap OOB in `UpperBound` and `LowerBound` ([CVE-2021-37670](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37670))
+* Fixes an undefined behavior arising from reference binding to nullptr in map operations ([CVE-2021-37671](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37671))
+* Fixes a heap OOB in `SdcaOptimizerV2` ([CVE-2021-37672](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37672))
+* Fixes a `CHECK`-fail in `MapStage` ([CVE-2021-37673](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37673))
+* Fixes a vulnerability arising from incomplete validation in `MaxPoolGrad` ([CVE-2021-37674](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37674))
+* Fixes an undefined behavior arising from reference binding to nullptr in shape inference ([CVE-2021-37676](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37676))
+* Fixes a division by 0 in most convolution operators ([CVE-2021-37675](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37675))
+* Fixes vulnerabilities arising from missing validation in shape inference for `Dequantize` ([CVE-2021-37677](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37677))
+* Fixes an arbitrary code execution due to YAML deserialization ([CVE-2021-37678](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37678))
+* Fixes a heap OOB in nested `tf.map_fn` with `RaggedTensor`s ([CVE-2021-37679](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37679))
+* Fixes a division by zero in TFLite ([CVE-2021-37680](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37680))
+* Fixes an NPE in TFLite ([CVE-2021-37681](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37681))
+* Fixes a vulnerability arising from use of unitialized value in TFLite ([CVE-2021-37682](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37682))
+* Fixes an FPE in TFLite division operations ([CVE-2021-37683](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37683))
+* Fixes an FPE in TFLite pooling operations ([CVE-2021-37684](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37684))
+* Fixes an infinite loop in TFLite ([CVE-2021-37686](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37686))
+* Fixes a heap OOB in TFLite ([CVE-2021-37685](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37685))
+* Fixes a heap OOB in TFLite's `Gather*` implementations ([CVE-2021-37687](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37687))
+* Fixes an undefined behavior arising from null pointer dereference in TFLite ([CVE-2021-37688](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37688))
+* Fixes an undefined behavior arising from null pointer dereference in TFLite MLIR optimizations ([CVE-2021-37689](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37689))
+* Fixes a FPE in LSH in TFLite ([CVE-2021-37691](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37691))
+* Fixes a segfault on strings tensors with mismatched dimensions, arising in Go code ([CVE-2021-37692](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37692))
+* Fixes a use after free and a potential segfault in shape inference functions ([CVE-2021-37690](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-37690))
+* Updates `curl` to `7.77.0` to handle [CVE-2021-22876](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22876), [CVE-2021-22897](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22897), [CVE-2021-22898](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22898), and [CVE-2021-22901](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22901).
+
+# Release 2.4.2
+
+This release introduces several vulnerability fixes:
+
+* Fixes a heap buffer overflow in `RaggedBinCount` ([CVE-2021-29512](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29512))
+* Fixes a heap out of bounds write in `RaggedBinCount` ([CVE-2021-29514](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29514))
+* Fixes a type confusion during tensor casts which leads to dereferencing null pointers ([CVE-2021-29513](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29513))
+* Fixes a reference binding to null pointer in `MatrixDiag*` ops ([CVE-2021-29515](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29515))
+* Fixes a null pointer dereference via invalid Ragged Tensors ([CVE-2021-29516](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29516))
+* Fixes a division by zero in `Conv3D` ([CVE-2021-29517](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29517))
+* Fixes vulnerabilities where session operations in eager mode lead to null pointer dereferences ([CVE-2021-29518](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29518))
+* Fixes a `CHECK`-fail in `SparseCross` caused by type confusion ([CVE-2021-29519](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29519))
+* Fixes a segfault in `SparseCountSparseOutput` ([CVE-2021-29521](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29521))
+* Fixes a heap buffer overflow in `Conv3DBackprop*` ([CVE-2021-29520](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29520))
+* Fixes a division by 0 in `Conv3DBackprop*` ([CVE-2021-29522](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29522))
+* Fixes a `CHECK`-fail in `AddManySparseToTensorsMap` ([CVE-2021-29523](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29523))
+* Fixes a division by 0 in `Conv2DBackpropFilter` ([CVE-2021-29524](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29524))
+* Fixes a division by 0 in `Conv2DBackpropInput` ([CVE-2021-29525](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29525))
+* Fixes a division by 0 in `Conv2D` ([CVE-2021-29526](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29526))
+* Fixes a division by 0 in `QuantizedConv2D` ([CVE-2021-29527](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29527))
+* Fixes a division by 0 in `QuantizedMul` ([CVE-2021-29528](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29528))
+* Fixes vulnerabilities caused by invalid validation in `SparseMatrixSparseCholesky` ([CVE-2021-29530](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29530))
+* Fixes a heap buffer overflow caused by rounding ([CVE-2021-29529](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29529))
+* Fixes a `CHECK`-fail in `tf.raw_ops.EncodePng` ([CVE-2021-29531](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29531))
+* Fixes a heap out of bounds read in `RaggedCross` ([CVE-2021-29532](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29532))
+* Fixes a `CHECK`-fail in `DrawBoundingBoxes` ([CVE-2021-29533](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29533))
+* Fixes a heap buffer overflow in `QuantizedMul` ([CVE-2021-29535](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29535))
+* Fixes a `CHECK`-fail in `SparseConcat` ([CVE-2021-29534](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29534))
+* Fixes a heap buffer overflow in `QuantizedResizeBilinear` ([CVE-2021-29537](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29537))
+* Fixes a heap buffer overflow in `QuantizedReshape` ([CVE-2021-29536](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29536))
+* Fixes a division by zero in `Conv2DBackpropFilter` ([CVE-2021-29538](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29538))
+* Fixes a heap buffer overflow in `Conv2DBackpropFilter` ([CVE-2021-29540](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29540))
+* Fixes a heap buffer overflow in `StringNGrams` ([CVE-2021-29542](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29542))
+* Fixes a null pointer dereference in `StringNGrams` ([CVE-2021-29541](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29541))
+* Fixes a `CHECK`-fail in `QuantizeAndDequantizeV4Grad` ([CVE-2021-29544](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29544))
+* Fixes a `CHECK`-fail in `CTCGreedyDecoder` ([CVE-2021-29543](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29543))
+* Fixes a heap buffer overflow in `SparseTensorToCSRSparseMatrix` ([CVE-2021-29545](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29545))
+* Fixes a division by 0 in `QuantizedBiasAdd` ([CVE-2021-29546](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29546))
+* Fixes a heap out of bounds in `QuantizedBatchNormWithGlobalNormalization` ([CVE-2021-29547](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29547))
+* Fixes a division by 0 in `QuantizedBatchNormWithGlobalNormalization` ([CVE-2021-29548](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29548))
+* Fixes a division by 0 in `QuantizedAdd` ([CVE-2021-29549](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29549))
+* Fixes a division by 0 in `FractionalAvgPool` ([CVE-2021-29550](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29550))
+* Fixes an OOB read in `MatrixTriangularSolve` ([CVE-2021-29551](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29551))
+* Fixes a heap OOB in `QuantizeAndDequantizeV3` ([CVE-2021-29553](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29553))
+* Fixes a `CHECK`-failure in `UnsortedSegmentJoin` ([CVE-2021-29552](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29552))
+* Fixes a division by 0 in `DenseCountSparseOutput` ([CVE-2021-29554](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29554))
+* Fixes a division by 0 in `FusedBatchNorm` ([CVE-2021-29555](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29555))
+* Fixes a division by 0 in `SparseMatMul` ([CVE-2021-29557](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29557))
+* Fixes a division by 0 in `Reverse` ([CVE-2021-29556](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29556))
+* Fixes a heap buffer overflow in `SparseSplit` ([CVE-2021-29558](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29558))
+* Fixes a heap OOB access in unicode ops ([CVE-2021-29559](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29559))
+* Fixes a heap buffer overflow in `RaggedTensorToTensor` ([CVE-2021-29560](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29560))
+* Fixes a `CHECK`-fail in `LoadAndRemapMatrix` ([CVE-2021-29561](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29561))
+* Fixes a `CHECK`-fail in `tf.raw_ops.IRFFT` ([CVE-2021-29562](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29562))
+* Fixes a `CHECK`-fail in `tf.raw_ops.RFFT` ([CVE-2021-29563](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29563))
+* Fixes a null pointer dereference in `EditDistance` ([CVE-2021-29564](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29564))
+* Fixes a null pointer dereference in `SparseFillEmptyRows` ([CVE-2021-29565](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29565))
+* Fixes a heap OOB access in `Dilation2DBackpropInput` ([CVE-2021-29566](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29566))
+* Fixes a reference binding to null in `ParameterizedTruncatedNormal` ([CVE-2021-29568](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29568))
+* Fixes a set of vulnerabilities caused by lack of validation in `SparseDenseCwiseMul` ([CVE-2021-29567](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29567))
+* Fixes a heap out of bounds read in `MaxPoolGradWithArgmax` ([CVE-2021-29570](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29570))
+* Fixes a heap out of bounds read in `RequantizationRange` ([CVE-2021-29569](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29569))
+* Fixes a memory corruption in `DrawBoundingBoxesV2` ([CVE-2021-29571](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29571))
+* Fixes a reference binding to nullptr in `SdcaOptimizer` ([CVE-2021-29572](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29572))
+* Fixes an overflow and a denial of service in `tf.raw_ops.ReverseSequence` ([CVE-2021-29575](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29575))
+* Fixes a division by 0 in `MaxPoolGradWithArgmax` ([CVE-2021-29573](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29573))
+* Fixes an undefined behavior in `MaxPool3DGradGrad` ([CVE-2021-29574](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29574))
+* Fixes a heap buffer overflow in `MaxPool3DGradGrad` ([CVE-2021-29576](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29576))
+* Fixes a heap buffer overflow in `AvgPool3DGrad` ([CVE-2021-29577](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29577))
+* Fixes an undefined behavior and a `CHECK`-fail in `FractionalMaxPoolGrad` ([CVE-2021-29580](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29580))
+* Fixes a heap buffer overflow in `FractionalAvgPoolGrad` ([CVE-2021-29578](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29578))
+* Fixes a heap buffer overflow in `MaxPoolGrad` ([CVE-2021-29579](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29579))
+* Fixes a segfault in `CTCBeamSearchDecoder` ([CVE-2021-29581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29581))
+* Fixes a heap OOB read in `tf.raw_ops.Dequantize` ([CVE-2021-29582](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29582))
+* Fixes a `CHECK`-fail due to integer overflow ([CVE-2021-29584](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29584))
+* Fixes a heap buffer overflow and undefined behavior in `FusedBatchNorm` ([CVE-2021-29583](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29583))
+* Fixes a division by zero in padding computation in TFLite ([CVE-2021-29585](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29585))
+* Fixes a division by zero in optimized pooling implementations in TFLite ([CVE-2021-29586](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29586))
+* Fixes a division by zero in TFLite's implementation of `SpaceToDepth` ([CVE-2021-29587](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29587))
+* Fixes a division by zero in TFLite's implementation of `GatherNd` ([CVE-2021-29589](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29589))
+* Fixes a division by zero in TFLite's implementation of `TransposeConv` ([CVE-2021-29588](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29588))
+* Fixes a heap OOB read in TFLite's implementation of `Minimum` or `Maximum` ([CVE-2021-29590](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29590))
+* Fixes a null pointer dereference in TFLite's `Reshape` operator ([CVE-2021-29592](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29592))
+* Fixes a stack overflow due to looping TFLite subgraph ([CVE-2021-29591](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29591))
+* Fixes a division by zero in TFLite's implementation of `DepthToSpace` ([CVE-2021-29595](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29595))
+* Fixes a division by zero in TFLite's convolution code ([CVE-2021-29594](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29594))
+* Fixes a division by zero in TFLite's implementation of `EmbeddingLookup` ([CVE-2021-29596](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29596))
+* Fixes a division by zero in TFLite's implementation of `BatchToSpaceNd` ([CVE-2021-29593](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29593))
+* Fixes a division by zero in TFLite's implementation of `SpaceToBatchNd` ([CVE-2021-29597](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29597))
+* Fixes a division by zero in TFLite's implementation of `SVDF` ([CVE-2021-29598](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29598))
+* Fixes a division by zero in TFLite's implementation of `Split` ([CVE-2021-29599](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29599))
+* Fixes a division by zero in TFLite's implementation of `OneHot` ([CVE-2021-29600](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29600))
+* Fixes a division by zero in TFLite's implementation of `DepthwiseConv` ([CVE-2021-29602](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29602))
+* Fixes a division by zero in TFLite's implementation of hashtable lookup ([CVE-2021-29604](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29604))
+* Fixes a integer overflow in TFLite concatentation ([CVE-2021-29601](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29601))
+* Fixes a integer overflow in TFLite memory allocation ([CVE-2021-29605](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29605))
+* Fixes a heap OOB write in TFLite ([CVE-2021-29603](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29603))
+* Fixes a heap OOB read in TFLite ([CVE-2021-29606](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29606))
+* Fixes a heap OOB and null pointer dereference in `RaggedTensorToTensor` ([CVE-2021-29608](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29608))
+* Fixes vulnerabilities caused by incomplete validation in `SparseAdd` ([CVE-2021-29609](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29609))
+* Fixes vulnerabilities caused by incomplete validation in `SparseSparseMinimum` ([CVE-2021-29607](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29607))
+* Fixes vulnerabilities caused by incomplete validation in `SparseReshape` ([CVE-2021-29611](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29611))
+* Fixes vulnerabilities caused by invalid validation in `QuantizeAndDequantizeV2` ([CVE-2021-29610](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29610))
+* Fixes a heap buffer overflow in `BandedTriangularSolve` ([CVE-2021-29612](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29612))
+* Fixes vulnerabilities caused by incomplete validation in `tf.raw_ops.CTCLoss` ([CVE-2021-29613](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29613))
+* Fixes an interpreter crash from vulnerabilities in `tf.io.decode_raw` ([CVE-2021-29614](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29614))
+* Fixes a stack overflow in `ParseAttrValue` with nested tensors ([CVE-2021-29615](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29615))
+* Fixes a null dereference in Grappler's `TrySimplify` ([CVE-2021-29616](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29616))
+* Fixes a crash in `tf.transpose` with complex inputs ([CVE-2021-29618](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29618))
+* Fixes a crash in `tf.strings.substr` due to `CHECK`-fail ([CVE-2021-29617](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29617))
+* Fixes a segfault in `tf.raw_ops.SparseCountSparseOutput` ([CVE-2021-29619](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29619))
+* Fixes a segfault in `tf.raw_ops.ImmutableConst` ([CVE-2021-29539](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29539))
+* Updates `curl` to `7.76.0` to handle [CVE-2020-8169](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8169), [CVE-2020-8177](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8177), [CVE-2020-8231](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8231), [CVE-2020-8284](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8284), [CVE-2020-8285](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8285) and [CVE-2020-8286](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8286).
+
+# Release 2.4.1
+
+This release removes the AVX2 requirement from TF 2.4.0.
+
 # Release 2.4.0
 
 ## Major Features and Improvements
 
 * `tf.distribute` introduces experimental support for asynchronous training of models via the [`tf.distribute.experimental.ParameterServerStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/ParameterServerStrategy) API. Please see the [tutorial](https://www.tensorflow.org/tutorials/distribute/parameter_server_training) to learn more.
 
-* [`MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) is now a stable API and is no longer considered experimental. Some of the major improvements involve handling peer failure and many bug fixes. Please check out the detailed tutorial on [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras). 
+* [`MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) is now a stable API and is no longer considered experimental. Some of the major improvements involve handling peer failure and many bug fixes. Please check out the detailed tutorial on [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras).
 
-* Introduces experimental support for a new module named [`tf.experimental.numpy`](https://www.tensorflow.org/api_docs/python/tf/experimental/numpy) which is a NumPy-compatible API for writing TF programs. See the [detailed guide](https://www.tensorflow.org/guide/tf_numpy) to learn more. Additional details below. 
+* Introduces experimental support for a new module named [`tf.experimental.numpy`](https://www.tensorflow.org/api_docs/python/tf/experimental/numpy) which is a NumPy-compatible API for writing TF programs. See the [detailed guide](https://www.tensorflow.org/guide/tf_numpy) to learn more. Additional details below.
 
 * Adds Support for
-  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) on Ampere based GPUs. TensorFloat-32, or TF32 for short, is a math mode for NVIDIA Ampere based GPUs and is enabled by default. 
-  
+  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) on Ampere based GPUs. TensorFloat-32, or TF32 for short, is a math mode for NVIDIA Ampere based GPUs and is enabled by default.
+
 * A major refactoring of the internals of the Keras Functional API has been completed, that should improve the reliability, stability, and performance of constructing Functional models.
 
 * Keras mixed precision API [`tf.keras.mixed_precision`](https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision?version=nightly) is no longer experimental and allows the use of 16-bit floating point formats during training, improving performance by up to 3x on GPUs and 60% on TPUs. Please see below for additional details.
@@ -24,71 +250,71 @@
 ## Breaking Changes
 
 * TF Core:
-  * Certain float32 ops run in lower precsion on Ampere based GPUs, including  matmuls and convolutions, due to the use of [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/). Specifically, inputs to such ops are rounded from 23 bits of precision to 10
+  * Certain float32 ops run in lower precision on Ampere based GPUs, including matmuls and convolutions, due to the use of [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/). Specifically, inputs to such ops are rounded from 23 bits of precision to 10
   bits of precision. This is unlikely to cause issues in practice for deep learning models. In some cases, TensorFloat-32 is also used for complex64 ops.
-  TensorFloat-32 can be disabled by running `tf.config.experimental.enable_tensor_float_32_execution(False)`. 
+  TensorFloat-32 can be disabled by running `tf.config.experimental.enable_tensor_float_32_execution(False)`.
   * The byte layout for string tensors across the C-API has been updated to match TF Core/C++; i.e., a contiguous array of `tensorflow::tstring`/`TF_TString`s.
-  * C-API functions `TF_StringDecode`, `TF_StringEncode`, and `TF_StringEncodedSize` are no longer relevant and have been removed; see `core/platform/ctstring.h` for  string access/modification in C.
+  * C-API functions `TF_StringDecode`, `TF_StringEncode`, and `TF_StringEncodedSize` are no longer relevant and have been removed; see `core/platform/ctstring.h` for string access/modification in C.
   * `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are now hidden. These modules are not part of TensorFlow public API.
   * `tf.raw_ops.Max` and `tf.raw_ops.Min` no longer accept inputs of type `tf.complex64` or `tf.complex128`, because the behavior of these ops is not well defined for complex types.
   * XLA:CPU and XLA:GPU devices are no longer registered by default. Use `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` if you really need them, but this flag will eventually be removed in subsequent releases.
 
-* `tf.keras`:  
+* `tf.keras`:
   * The `steps_per_execution` argument in `model.compile()` is no longer experimental; if you were passing `experimental_steps_per_execution`, rename it to `steps_per_execution` in your code. This argument controls the number of batches to run during each `tf.function` call when calling `model.fit()`. Running multiple batches inside a single `tf.function` call can greatly improve performance on TPUs or small models with a large Python overhead.
   * A **major refactoring** of the internals of the Keras Functional API may affect code that
   is relying on certain internal details:
     * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`.
-    * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.) may break. 
+    * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.) may break.
     * Code that uses full path for `get_concrete_function` to trace Keras symbolic inputs directly should switch to building matching `tf.TensorSpec`s directly and tracing the `TensorSpec` objects.
-    * Code that relies on the exact number and names of the op layers that TensorFlow operations  were converted into may have changed.
-    * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and  happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly  or being silently buggy.
-    * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to  return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values.
+    * Code that relies on the exact number and names of the op layers that TensorFlow operations were converted into may have changed.
+    * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly or being silently buggy.
+    * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values.
     * Code already susceptible to leaking tensors outside of graphs becomes slightly more likely to do so now.
     * Code that tries directly getting gradients with respect to symbolic Keras inputs/outputs. Use `GradientTape` on the actual Tensors passed to the already-constructed model instead.
     * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient.
-    * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold       true before TF 2.4 either, but is more likely to cause issues now.
+    * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues now.
     * Code that manually enters `keras.backend.get_graph()` before building a functional model is no longer needed.
-    * Start enforcing input shape assumptions when calling Functional API Keras models. This may potentially break some users, in case there is a mismatch             between the shape used when creating `Input` objects in a Functional model, and the shape of the data passed to that model. You can fix this mismatch by         either calling the model with correctly-shaped data, or by relaxing `Input` shape assumptions (note that you can pass shapes with `None` entries for axes
+    * Start enforcing input shape assumptions when calling Functional API Keras models. This may potentially break some users, in case there is a mismatch between the shape used when creating `Input` objects in a Functional model, and the shape of the data passed to that model. You can fix this mismatch by either calling the model with correctly-shaped data, or by relaxing `Input` shape assumptions (note that you can pass shapes with `None` entries for axes
       that are meant to be dynamic). You can also disable the input checking entirely by setting `model.input_spec = None`.
-  * Several changes have been made to `tf.keras.mixed_precision.experimental`. Note that it is now recommended to use the non-experimental          `tf.keras.mixed_precision` API.
+  * Several changes have been made to `tf.keras.mixed_precision.experimental`. Note that it is now recommended to use the non-experimental `tf.keras.mixed_precision` API.
    * `AutoCastVariable.dtype` now refers to the actual variable dtype, not the dtype it will be casted to.
    * When mixed precision is enabled, `tf.keras.layers.Embedding` now outputs a float16 or bfloat16 tensor instead of a float32 tensor.
-   * The property `tf.keras.mixed_precision.experimental.LossScaleOptimizer.loss_scale` is now a tensor, not a `LossScale` object. This means to get a loss scale      of a `LossScaleOptimizer` as a tensor, you must now call `opt.loss_scale`instead of `opt.loss_scale()`.
+   * The property `tf.keras.mixed_precision.experimental.LossScaleOptimizer.loss_scale` is now a tensor, not a `LossScale` object. This means to get a loss scale of a `LossScaleOptimizer` as a tensor, you must now call `opt.loss_scale`instead of `opt.loss_scale()`.
    * The property `should_cast_variables` has been removed from `tf.keras.mixed_precision.experimental.Policy`
-   * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the `DynamicLossScale`'s        multiplier must be 2.
+   * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the `DynamicLossScale`'s multiplier must be 2.
    * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the weights of
-     the `DynanmicLossScale` are copied into the `LossScaleOptimizer` instead of being reused. This means modifying the weights of the `DynamicLossScale` will no      longer affect the weights of the LossScaleOptimizer, and vice versa.
+     the `DynanmicLossScale` are copied into the `LossScaleOptimizer` instead of being reused. This means modifying the weights of the `DynamicLossScale` will no longer affect the weights of the LossScaleOptimizer, and vice versa.
    * The global policy can no longer be set to a non-floating point policy in `tf.keras.mixed_precision.experimental.set_policy`
-   * In `Layer.call`, `AutoCastVariable`s will no longer be casted within `MirroredStrategy.run` or `ReplicaContext.merge_call`. This is because a thread local        variable is used to determine whether `AutoCastVariable`s are casted, and those two functions run with a different thread. Note this only applies if one of      these two functions is called within `Layer.call`; if one of those two functions calls `Layer.call`, `AutoCastVariable`s will still be casted.
+   * In `Layer.call`, `AutoCastVariable`s will no longer be casted within `MirroredStrategy.run` or `ReplicaContext.merge_call`. This is because a thread local variable is used to determine whether `AutoCastVariable`s are casted, and those two functions run with a different thread. Note this only applies if one of these two functions is called within `Layer.call`; if one of those two functions calls `Layer.call`, `AutoCastVariable`s will still be casted.
 
 * `tf.data`:
-  * `tf.data.experimental.service.DispatchServer` now takes a config tuple instead of individual arguments. Usages should be updated to      `tf.data.experimental.service.DispatchServer(dispatcher_config)`.
-  * `tf.data.experimental.service.WorkerServer` now takes a config tuple instead of individual arguments. Usages should be updated to  `tf.data.experimental.service.WorkerServer(worker_config)`.
-  
+  * `tf.data.experimental.service.DispatchServer` now takes a config tuple instead of individual arguments. Usages should be updated to `tf.data.experimental.service.DispatchServer(dispatcher_config)`.
+  * `tf.data.experimental.service.WorkerServer` now takes a config tuple instead of individual arguments. Usages should be updated to `tf.data.experimental.service.WorkerServer(worker_config)`.
+
 * `tf.distribute`:
   * Removes `tf.distribute.Strategy.experimental_make_numpy_dataset`. Please use `tf.data.Dataset.from_tensor_slices` instead.
-  * Renames `experimental_hints` in `tf.distribute.StrategyExtended.reduce_to`, `tf.distribute.StrategyExtended.batch_reduce_to`,      `tf.distribute.ReplicaContext.all_reduce` to `options`.
+  * Renames `experimental_hints` in `tf.distribute.StrategyExtended.reduce_to`, `tf.distribute.StrategyExtended.batch_reduce_to`, `tf.distribute.ReplicaContext.all_reduce` to `options`.
   * Renames `tf.distribute.experimental.CollectiveHints` to `tf.distribute.experimental.CommunicationOptions`.
   * Renames `tf.distribute.experimental.CollectiveCommunication` to `tf.distribute.experimental.CommunicationImplementation`.
-  * Renames `tf.distribute.Strategy.experimental_distribute_datasets_from_function` to `distribute_datasets_from_function` as it is no longer experimental. 
+  * Renames `tf.distribute.Strategy.experimental_distribute_datasets_from_function` to `distribute_datasets_from_function` as it is no longer experimental.
   * Removes `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2.
 
 * `tf.lite`:
   * `tf.quantization.quantize_and_dequantize_v2` has been introduced, which updates the gradient definition for quantization which is outside the range
-     to be 0. To simulate the V1 the behavior of `tf.quantization.quantize_and_dequantize(...)` use             `tf.grad_pass_through(tf.quantization.quantize_and_dequantize_v2)(...)`.
+     to be 0. To simulate the V1 the behavior of `tf.quantization.quantize_and_dequantize(...)` use `tf.grad_pass_through(tf.quantization.quantize_and_dequantize_v2)(...)`.
 
 * Building TensorFlow:
   * Windows platform builds: TensorFlow on Windows under MSVC is now built with `--copt=/experimental:preprocessor --host_copt=/experimental:preprocessor` (see `.bazelrc` for more details). Builds including TensorFlow may fail with unexpected syntax errors if these flags are absent. See also [this thread on SIG Build](https://groups.google.com/a/tensorflow.org/g/build/c/LbAw8RILvTg/m/ttnuhYU2BgAJ).
 
 ## Known Caveats
   * `tf.keras.mixed_precision`
-    * When using mixed precision, calling `RMSprop.apply_gradients` or `Nadam.apply_gradients` outside a `tf.function` does not work and will raise the                 AttributeError "Tensor.op is meaningless when eager execution is enabled". See this [issue](https://github.com/tensorflow/tensorflow/issues/45536) for           details and a workaround.
+    * When using mixed precision, calling `RMSprop.apply_gradients` or `Nadam.apply_gradients` outside a `tf.function` does not work and will raise the AttributeError "Tensor.op is meaningless when eager execution is enabled". See this [issue](https://github.com/tensorflow/tensorflow/issues/45536) for details and a workaround.
 
 ## Bug Fixes and Other Changes
 
 ### TF Core:
   * Introduces experimental support for a new module named [`tf.experimental.numpy`](https://www.tensorflow.org/api_docs/python/tf/experimental/numpy), which
-    is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable       `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. 
+    is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases.
     See [tensorflow/python/ops/numpy_ops/README.md](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/numpy_ops/README.md)
     for details of what operations are supported and what are the differences from NumPy.
   * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as type annotation for variables representing a Tensor or a value
@@ -97,35 +323,35 @@
     tf.reshape truncating inputs such as from int64 to int32.
   * Adds `tf.sparse.map_values` to apply a function to the `.value`s of `SparseTensor` arguments.
   * The Python bitwise operators for `Tensor` (`__and__`, `__or__`, `__xor__` and `__invert__` now support non-`bool` arguments and apply
-    the corresponding bitwise ops. `bool` arguments continue to be supported and dispatch to logical ops. This brings them more in line with 
+    the corresponding bitwise ops. `bool` arguments continue to be supported and dispatch to logical ops. This brings them more in line with
     Python and NumPy behavior.
   * Adds `tf.SparseTensor.with_values`. This returns a new SparseTensor with the same sparsity pattern, but with new provided values. It is
     similar to the `with_values` function of `RaggedTensor`.
   * Adds `StatelessCase` op, and uses it if none of case branches has stateful ops.
   * Adds `tf.config.experimental.get_memory_usage` to return total memory usage of the device.
   * Adds gradients for `RaggedTensorToVariant` and `RaggedTensorFromVariant`.
-  * Improve shape inference of nested function calls by supporting constant folding across Arg nodes which makes more static values available to shape               inference functions.
+  * Improve shape inference of nested function calls by supporting constant folding across Arg nodes which makes more static values available to shape inference functions.
 * `tf.debugging`:
   * `tf.debugging.assert_shapes()` now works on `SparseTensor`s (Fixes [#36268](https://github.com/tensorflow/tensorflow/issues/36268)).
 * GPU
-  * Adds Support for [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) on Ampere based GPUs. 
-    TensorFloat-32, or TF32 for short, is a math mode for NVIDIA Ampere based GPUs which causes certain float32 ops, such as matrix 
-    multiplications and convolutions, to run much faster on Ampere GPUs but with reduced precision. This reduced precision has not been found 
-    to effect convergence quality of deep learning models in practice. TensorFloat-32 is enabled by default, but can be disabled with                 `tf.config.experimental.enable_tensor_float_32_execution`.
+  * Adds Support for [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/) on Ampere based GPUs.
+    TensorFloat-32, or TF32 for short, is a math mode for NVIDIA Ampere based GPUs which causes certain float32 ops, such as matrix
+    multiplications and convolutions, to run much faster on Ampere GPUs but with reduced precision. This reduced precision has not been found
+    to effect convergence quality of deep learning models in practice. TensorFloat-32 is enabled by default, but can be disabled with `tf.config.experimental.enable_tensor_float_32_execution`.
 * `tf.math`:
   * Adds `tf.math.erfcinv`, the inverse to `tf.math.erfc`.
 * `tf.nn`:
   *   `tf.nn.max_pool2d` now supports explicit padding.
 * `tf.image`:
-  * Adds deterministic `tf.image.stateless_random_*` functions for each `tf.image.random_*` function. Added a new op `stateless_sample_distorted_bounding_box`       which is a deterministic version of `sample_distorted_bounding_box` op. Given the same seed, these stateless functions/ops produce the same results               independent of how many times the function is called, and independent of global seed settings.
-  * Adds deterministic `tf.image.resize` backprop CUDA kernels for `method=ResizeMethod.BILINEAR` (the default method). Enable by setting the environment             variable `TF_DETERMINISTIC_OPS` to `"true"` or `"1"`.
+  * Adds deterministic `tf.image.stateless_random_*` functions for each `tf.image.random_*` function. Added a new op `stateless_sample_distorted_bounding_box` which is a deterministic version of `sample_distorted_bounding_box` op. Given the same seed, these stateless functions/ops produce the same results independent of how many times the function is called, and independent of global seed settings.
+  * Adds deterministic `tf.image.resize` backprop CUDA kernels for `method=ResizeMethod.BILINEAR` (the default method). Enable by setting the environment variable `TF_DETERMINISTIC_OPS` to `"true"` or `"1"`.
 * `tf.print`:
   * Bug fix in `tf.print()` with `OrderedDict` where if an `OrderedDict` didn't have the keys sorted, the keys and values were not being printed
     in accordance with their correct mapping.
 * `tf.train.Checkpoint`:
-  * Now accepts a `root` argument in the initialization, which generates a checkpoint with a root object. This allows users to create a `Checkpoint` object that     is compatible with Keras `model.save_weights()` and `model.load_weights`. The checkpoint is also compatible with the checkpoint saved in the `variables/`         folder in the SavedModel.
+  * Now accepts a `root` argument in the initialization, which generates a checkpoint with a root object. This allows users to create a `Checkpoint` object that is compatible with Keras `model.save_weights()` and `model.load_weights`. The checkpoint is also compatible with the checkpoint saved in the `variables/` folder in the SavedModel.
   * When restoring, `save_path` can be a path to a SavedModel. The function will automatically find the checkpoint in the SavedModel.
-  
+
 ### `tf.data`:
   * Adds new `tf.data.experimental.service.register_dataset` and `tf.data.experimental.service.from_dataset_id` APIs to enable one
     process to register a dataset with the tf.data service, and another process to consume data from the dataset.
@@ -138,7 +364,7 @@
   * Adds support for a new "distributed_epoch" processing mode. This processing mode distributes a dataset across all tf.data workers,
     instead of having each worker process the full dataset. See [the tf.data service docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/service#understand_processing_mode) to learn more.
   * Adds optional `exclude_cols` parameter to CsvDataset. This parameter is the complement of `select_cols`; at most one of these should be specified.
-  * We have implemented an optimization which reorders data-discarding transformations such as `take` and `shard` to happen earlier in the dataset when it is           safe to do so. The optimization can be disabled via the `experimental_optimization.reorder_data_discarding_ops` dataset option.
+  * We have implemented an optimization which reorders data-discarding transformations such as `take` and `shard` to happen earlier in the dataset when it is safe to do so. The optimization can be disabled via the `experimental_optimization.reorder_data_discarding_ops` dataset option.
   * `tf.data.Options` were previously immutable and can now be overridden.
   * `tf.data.Dataset.from_generator` now supports Ragged and Sparse tensors with a new `output_signature` argument, which allows `from_generator` to
     produce any type describable by a `tf.TypeSpec`.
@@ -149,8 +375,8 @@
     * Replaces the existing `tf.distribute.experimental.ParameterServerStrategy` symbol with a new class that is for parameter server training in TF2. Usage of
       the old symbol, usually with Estimator API, should be **replaced** with [`tf.compat.v1.distribute.experimental.ParameterServerStrategy`].
     * Added `tf.distribute.experimental.coordinator.*` namespace, including the main API `ClusterCoordinator` for coordinating the training cluster, the related data structure `RemoteValue` and `PerWorkerValue`.
-  * `MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) is now a stable API and is no longer considered experimental. Some of the major improvements involve handling peer failure and many bug fixes. Please check out the detailed tutorial on 
-     [Multi-worer training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras).    
+  * `MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy) is now a stable API and is no longer considered experimental. Some of the major improvements involve handling peer failure and many bug fixes. Please check out the detailed tutorial on
+     [Multi-worer training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras).
   * Adds `tf.distribute.Strategy.gather` and `tf.distribute.ReplicaContext.all_gather` APIs to support gathering dense distributed values.
   * Fixes various issues with saving a distributed model.
 
@@ -165,7 +391,7 @@
     * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be
       clearer and easier to understand.
   * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape` as an alternative to accepting a `callable` loss.
-  * Adds `beta` hyperparameter to [FTRL](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Ftrl) optimizer classes (Keras and others) to match [FTRL paper](https://research.google.com/pubs/archive/41159.pdf).  
+  * Adds `beta` hyperparameter to [FTRL](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Ftrl) optimizer classes (Keras and others) to match [FTRL paper](https://research.google.com/pubs/archive/41159.pdf).
   * `Optimizer.__init__` now accepts a `gradient_aggregator` to allow for customization of how gradients are aggregated across devices, as well as `gradients_transformers` to allow for custom gradient transformations (such as gradient clipping).
   * Improvements to Keras preprocessing layers:
     * TextVectorization can now accept a vocabulary list or file as an init arg.
@@ -174,15 +400,15 @@
     True, the layer returns the attention scores as an additional output argument.
   * Adds `tf.metrics.log_cosh` and `tf.metrics.logcosh` API entrypoints with the same implementation as their `tf.losses` equivalent.
   * For Keras model, the individual call of `Model.evaluate` uses no cached data for evaluation, while `Model.fit` uses cached data when `validation_data` arg is provided for better performance.
-  * Adds a `save_traces` argument to `model.save`/ `tf.keras.models.save_model` which determines whether the SavedModel format stores the Keras model/layer call     functions. The traced functions allow Keras to revive custom models and layers without the original class definition, but if this isn't required the tracing     can be disabled with the added option.
+  * Adds a `save_traces` argument to `model.save`/ `tf.keras.models.save_model` which determines whether the SavedModel format stores the Keras model/layer call functions. The traced functions allow Keras to revive custom models and layers without the original class definition, but if this isn't required the tracing can be disabled with the added option.
   * The `tf.keras.mixed_precision` API is now non-experimental. The non-experimental API differs from the experimental API in several ways.
-    * `tf.keras.mixed_precision.Policy` no longer takes in a `tf.mixed_precision.experimental.LossScale` in the constructor, and no longer has a `LossScale`           associated with it. Instead, `Model.compile` will automatically wrap the optimizer with a `LossScaleOptimizer` using dynamic loss scaling if `Policy.name`       is "mixed_float16".
-    * `tf.keras.mixed_precision.LossScaleOptimizer`'s constructor takes in different arguments. In particular, it no longer takes in a `LossScale`, and there is       no longer a `LossScale` associated with the `LossScaleOptimizer`. Instead, `LossScaleOptimizer` directly implements fixed or dynamic loss scaling. See the       documentation of [`tf.keras.mixed_precision.experimental.LossScaleOptimizer`](https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/LossScaleOptimizer?version=nightly) for details on the differences between the       experimental `LossScaleOptimizer` and the new non-experimental `LossScaleOptimizer`.
-    * `tf.mixed_precision.experimental.LossScale` and its subclasses are deprecated, as all of its functionality now exists within                      `tf.keras.mixed_precision.LossScaleOptimizer`
+    * `tf.keras.mixed_precision.Policy` no longer takes in a `tf.mixed_precision.experimental.LossScale` in the constructor, and no longer has a `LossScale` associated with it. Instead, `Model.compile` will automatically wrap the optimizer with a `LossScaleOptimizer` using dynamic loss scaling if `Policy.name` is `mixed_float16`.
+    * `tf.keras.mixed_precision.LossScaleOptimizer`'s constructor takes in different arguments. In particular, it no longer takes in a `LossScale`, and there is no longer a `LossScale` associated with the `LossScaleOptimizer`. Instead, `LossScaleOptimizer` directly implements fixed or dynamic loss scaling. See the documentation of [`tf.keras.mixed_precision.experimental.LossScaleOptimizer`](https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/LossScaleOptimizer?version=nightly) for details on the differences between the experimental `LossScaleOptimizer` and the new non-experimental `LossScaleOptimizer`.
+    * `tf.mixed_precision.experimental.LossScale` and its subclasses are deprecated, as all of its functionality now exists within `tf.keras.mixed_precision.LossScaleOptimizer`
 
 ### `tf.lite`:
   * `TFLiteConverter`:
-    * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input         and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`). 
+    * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
   * NNAPI
     * Adds NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
     * Removes deprecated `Interpreter.setUseNNAPI(boolean)` Java API. Use `Interpreter.Options.setUseNNAPI` instead.
@@ -192,19 +418,19 @@
     * GPU acceleration now supports quantized models by default
   * `DynamicBuffer::AddJoinedString()` will now add a separator if the first string to be joined is empty.
   *  Adds support for cumulative sum (cumsum), both as builtin op and MLIR conversion.
-  
+
 ### `TensorRT`
   * Issues a warning when the `session_config` parameter for the TF1 converter is used or the `rewrite_config_template` field in the TF2
     converter parameter object is used.
-    
+
 ### TPU Enhancements:
   * Adds support for the `beta` parameter of the FTRL optimizer for TPU embeddings. Users of other TensorFlow platforms can implement equivalent
     behavior by adjusting the `l2` parameter.
 
 ### XLA Support:
-  * xla.experimental.compile is deprecated, use `tf.function(experimental_compile=True)` instead. 
+  * xla.experimental.compile is deprecated, use `tf.function(experimental_compile=True)` instead.
   * Adds `tf.function.experimental_get_compiler_ir` which returns compiler IR (currently 'hlo' and 'optimized_hlo') for given input for given function.
-  
+
 ### Security:
   * Fixes an undefined behavior causing a segfault in `tf.raw_ops.Switch`, ([CVE-2020-15190](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15190))
   * Fixes three vulnerabilities in conversion to DLPack format
@@ -241,7 +467,7 @@
   * Fixes a lack of validation in `tf.raw_ops.DataFormatVecPermute` and `tf.raw_ops.DataFormatDimMap` which can cause uninitialized memory access, read outside bounds of arrays, data corruption and segmentation faults ([CVE-2020-26267](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26267))
   * Fixes a crash caused by writing to read only memory region ([CVE-2020-26268](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26268))
   * Fixes a heap out of bounds access in filesystem globbing implementation ([CVE-2020-26269](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26269))
-    
+
 ### Other:
   * We have replaced uses of "whitelist" and "blacklist" with "allowlist" and "denylist" where possible. Please see [this list](https://developers.google.com/style/word-list#blacklist) for more context.
   * Adds `tf.config.experimental.mlir_bridge_rollout` which will help us rollout the new MLIR TPU bridge.
diff --git a/configure.py b/configure.py
index b4907775d93092..e5428a699732cf 100644
--- a/configure.py
+++ b/configure.py
@@ -526,7 +526,12 @@ def set_cc_opt_flags(environ_cp):
   elif is_windows():
     default_cc_opt_flags = '/arch:AVX'
   else:
-    default_cc_opt_flags = '-march=native -Wno-sign-compare'
+    # On all other platforms, no longer use `-march=native` as this can result
+    # in instructions that are too modern being generated. Users that want
+    # maximum performance should compile TF in their environment and can pass
+    # `-march=native` there.
+    # See https://github.com/tensorflow/tensorflow/issues/45744 and duplicates
+    default_cc_opt_flags = '-Wno-sign-compare'
   question = ('Please specify optimization flags to use during compilation when'
               ' bazel option "--config=opt" is specified [Default is %s]: '
              ) % default_cc_opt_flags
@@ -534,9 +539,7 @@ def set_cc_opt_flags(environ_cp):
                                                  question, default_cc_opt_flags)
   for opt in cc_opt_flags.split():
     write_to_bazelrc('build:opt --copt=%s' % opt)
-  # It should be safe on the same build host.
-  if not is_ppc64le() and not is_windows():
-    write_to_bazelrc('build:opt --host_copt=-march=native')
+    write_to_bazelrc('build:opt --host_copt=%s' % opt)
   write_to_bazelrc('build:opt --define with_default_optimizations=true')
 
 
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 99a278a14a4b37..a1c2ce5eb6a21c 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -116,7 +116,8 @@
 
 # Get sitepackages directories for the python installation.
 _site_packages_dirs = []
-_site_packages_dirs += [] if _site.USER_SITE is None else [_site.USER_SITE]
+if _site.ENABLE_USER_SITE and _site.USER_SITE is not None:
+  _site_packages_dirs += [_site.USER_SITE]
 _site_packages_dirs += [_p for _p in _sys.path if 'site-packages' in _p]
 if 'getsitepackages' in dir(_site):
   _site_packages_dirs += _site.getsitepackages()
diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.cc b/tensorflow/c/experimental/filesystem/modular_filesystem.cc
index 9c8d3518800b6b..3fdeaf32eeba57 100644
--- a/tensorflow/c/experimental/filesystem/modular_filesystem.cc
+++ b/tensorflow/c/experimental/filesystem/modular_filesystem.cc
@@ -133,7 +133,7 @@ bool ModularFileSystem::FilesExist(const std::vector<std::string>& files,
                                    TransactionToken* token,
                                    std::vector<Status>* status) {
   if (ops_->paths_exist == nullptr)
-    return FileSystem::FilesExist(files, status);
+    return FileSystem::FilesExist(files, token, status);
 
   std::vector<char*> translated_names;
   translated_names.reserve(files.size());
@@ -234,7 +234,7 @@ Status ModularFileSystem::DeleteRecursively(const std::string& dirname,
         "`undeleted_dirs` set to NULL");
 
   if (ops_->delete_recursively == nullptr)
-    return FileSystem::DeleteRecursively(dirname, undeleted_files,
+    return FileSystem::DeleteRecursively(dirname, token, undeleted_files,
                                          undeleted_dirs);
 
   UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
@@ -264,7 +264,7 @@ Status ModularFileSystem::DeleteDir(const std::string& dirname,
 Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname,
                                                TransactionToken* token) {
   if (ops_->recursively_create_dir == nullptr)
-    return FileSystem::RecursivelyCreateDir(dirname);
+    return FileSystem::RecursivelyCreateDir(dirname, token);
 
   UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
   std::string translated_name = TranslateName(dirname);
@@ -312,7 +312,8 @@ Status ModularFileSystem::Stat(const std::string& fname,
 
 Status ModularFileSystem::IsDirectory(const std::string& name,
                                       TransactionToken* token) {
-  if (ops_->is_directory == nullptr) return FileSystem::IsDirectory(name);
+  if (ops_->is_directory == nullptr)
+    return FileSystem::IsDirectory(name, token);
 
   UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
   std::string translated_name = TranslateName(name);
@@ -362,7 +363,8 @@ Status ModularFileSystem::RenameFile(const std::string& src,
 Status ModularFileSystem::CopyFile(const std::string& src,
                                    const std::string& target,
                                    TransactionToken* token) {
-  if (ops_->copy_file == nullptr) return FileSystem::CopyFile(src, target);
+  if (ops_->copy_file == nullptr)
+    return FileSystem::CopyFile(src, target, token);
 
   UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
   std::string translated_src = TranslateName(src);
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
index 4317db5957b353..5c83b7c98824c2 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
@@ -61,6 +61,9 @@ constexpr char kRelu6[] = "RELU6";
 constexpr char kRelu1[] = "RELU_N1_TO_1";
 
 bool L2NormalizeReduceAxis(Value sq_op, DenseElementsAttr axis) {
+  if (axis.getNumElements() == 0) {
+    return false;
+  }
   if (sq_op.getType().cast<ShapedType>().getRank() - 1 ==
           *axis.getValues<int>().begin() ||
       *axis.getValues<int>().begin() == -1) {
diff --git a/tensorflow/core/common_runtime/constant_folding.cc b/tensorflow/core/common_runtime/constant_folding.cc
index 384ec836cdf9b4..64ed1c398ada30 100644
--- a/tensorflow/core/common_runtime/constant_folding.cc
+++ b/tensorflow/core/common_runtime/constant_folding.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/graph/subgraph.h"
@@ -223,7 +224,8 @@ bool IsConstantFoldable(
     std::unordered_map<const Node*, std::vector<Tensor>>*
         shape_replacement_map) {
   if (n->IsConstant()) {
-    return true;
+    // Skip constant folding resources as they cannot be deep copied.
+    return n->output_type(0) != DT_RESOURCE;
   }
   if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) {
     return true;
diff --git a/tensorflow/core/common_runtime/immutable_executor_state.cc b/tensorflow/core/common_runtime/immutable_executor_state.cc
index 03d12a0e98abd5..3cde56bc85ab81 100644
--- a/tensorflow/core/common_runtime/immutable_executor_state.cc
+++ b/tensorflow/core/common_runtime/immutable_executor_state.cc
@@ -315,6 +315,10 @@ Status ImmutableExecutorState::BuildControlFlowInfo(const Graph* g,
     } else if (IsExit(curr_node)) {
       // Exit to the parent frame.
       parent = parent_nodes[curr_id];
+      if (!parent) {
+        return errors::InvalidArgument(
+            "Invalid Exit op: Cannot find a corresponding Enter op.");
+      }
       frame_name = cf_info->frame_names[parent->id()];
       parent = parent_nodes[parent->id()];
     } else {
diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc
index 375f809b31b369..ec655b2acd0184 100644
--- a/tensorflow/core/common_runtime/shape_refiner.cc
+++ b/tensorflow/core/common_runtime/shape_refiner.cc
@@ -120,9 +120,26 @@ Status ShapeRefiner::InferShapesForFunctionSubNode(
     TF_RETURN_IF_ERROR(outer_context->MakeShapeFromShapeProto(proto, &handle));
     outer_context->set_output(index, handle);
 
-    auto* resource = node_context->input_handle_shapes_and_types(0);
+    const std::vector<ShapeAndType>* resource =
+        node_context->input_handle_shapes_and_types(0);
     if (resource) {
-      outer_context->set_output_handle_shapes_and_types(index, *resource);
+      // `ShapesAndType`s contain `ShapeHandle`s.  These `ShapeHandle`s point
+      // to `Shape`s that are owned by a different inference context too.  We
+      // need to copy them to the outer context to prevent them from being
+      // destroyed before they are used.
+      std::vector<ShapeAndType> copied_shapes_and_types;
+      for (auto& shape_and_type : *resource) {
+        ShapeHandle handle;
+        TensorShapeProto proto;
+        node_context->ShapeHandleToProto(shape_and_type.shape, &proto);
+        TF_RETURN_IF_ERROR(
+            outer_context->MakeShapeFromShapeProto(proto, &handle));
+        copied_shapes_and_types.push_back(
+            ShapeAndType(handle, shape_and_type.dtype, shape_and_type.specialized_type));
+      }
+
+      outer_context->set_output_handle_shapes_and_types(
+          index, copied_shapes_and_types);
     }
   }
 
diff --git a/tensorflow/core/data/compression_utils.cc b/tensorflow/core/data/compression_utils.cc
index bbff3a96667d13..40238a05a2614b 100644
--- a/tensorflow/core/data/compression_utils.cc
+++ b/tensorflow/core/data/compression_utils.cc
@@ -29,9 +29,10 @@ Status CompressElement(const std::vector<Tensor>& element,
   int64 total_size = 0;
   for (auto& component : element) {
     if (DataTypeCanUseMemcpy(component.dtype())) {
-      // Some datatypes can be memcopied, allowing us to save two copies
-      // (AsProtoTensorContent and SerializeToArray).
-      total_size += DMAHelper::buffer(&component)->size();
+      const TensorBuffer* buffer = DMAHelper::buffer(&component);
+      if (buffer) {
+        total_size += buffer->size();
+      }
     } else {
       non_memcpy_components.emplace_back();
       component.AsProtoTensorContent(&non_memcpy_components.back());
@@ -53,8 +54,10 @@ Status CompressElement(const std::vector<Tensor>& element,
     component.shape().AsProto(metadata->mutable_tensor_shape());
     if (DataTypeCanUseMemcpy(component.dtype())) {
       const TensorBuffer* buffer = DMAHelper::buffer(&component);
-      memcpy(position, buffer->data(), buffer->size());
-      metadata->set_tensor_size_bytes(buffer->size());
+      if (buffer) {
+        memcpy(position, buffer->data(), buffer->size());
+        metadata->set_tensor_size_bytes(buffer->size());
+      }
     } else {
       TensorProto& proto = non_memcpy_components[non_memcpy_component_index++];
       proto.SerializeToArray(position, proto.ByteSizeLong());
@@ -94,8 +97,13 @@ Status UncompressElement(const CompressedElement& compressed,
     if (DataTypeCanUseMemcpy(metadata.dtype())) {
       out->emplace_back(metadata.dtype(), metadata.tensor_shape());
       TensorBuffer* buffer = DMAHelper::buffer(&out->back());
-      iov[i].iov_base = buffer->data();
-      iov[i].iov_len = buffer->size();
+      if (buffer) {
+        iov[i].iov_base = buffer->data();
+        iov[i].iov_len = buffer->size();
+      } else {
+        iov[i].iov_base = nullptr;
+        iov[i].iov_len = 0;
+      }
     } else {
       // Allocate an empty Tensor. We will fill it out later after
       // uncompressing into the tensor_proto_str.
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index de196f20da97d8..4c65aa9fd5322a 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -680,7 +680,9 @@ cc_library(
         "//tensorflow/core/lib/gtl:inlined_vector",
         "//tensorflow/core/lib/strings:str_util",
         "//tensorflow/core/lib/strings:strcat",
+        "//tensorflow/core/platform:errors",
         "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:macros",
         "//tensorflow/core/util:overflow",
         "//third_party/eigen3",
     ],
@@ -785,6 +787,7 @@ tf_cuda_library(
         "//tensorflow/core/lib/strings:str_util",
         "//tensorflow/core/lib/strings:strcat",
         "//tensorflow/core/platform:abi",
+        "//tensorflow/core/platform:errors",
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:macros",
         "//tensorflow/core/platform:platform_port",
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index 712e205c587c84..76fe36e7f1e2a6 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -38,6 +38,9 @@ namespace {
 // Do not construct large tensors to compute their hash or compare for equality.
 constexpr int kMaxAttrValueTensorByteSize = 32 * 1024 * 1024;  // 32mb
 
+// Limit nesting of tensors to 100 deep to prevent memory overflow.
+constexpr int kMaxTensorNestDepth = 100;
+
 // Return the size of the tensor represented by this TensorProto. If shape is
 // not fully defined return -1.
 int64 TensorByteSize(const TensorProto& t) {
@@ -224,6 +227,54 @@ string SummarizeFunc(const NameAttrList& func) {
   return strings::StrCat(func.name(), "[", absl::StrJoin(entries, ", "), "]");
 }
 
+bool ParseAttrValueHelper_TensorNestsUnderLimit(int limit, string to_parse) {
+  int nests = 0;
+  int maxed_out = to_parse.length();
+  int open_curly = to_parse.find('{');
+  int open_bracket = to_parse.find('<');
+  int close_curly = to_parse.find('}');
+  int close_bracket = to_parse.find('>');
+  if (open_curly == -1) {
+    open_curly = maxed_out;
+  }
+  if (open_bracket == -1) {
+    open_bracket = maxed_out;
+  }
+  int min = std::min(open_curly, open_bracket);
+  do {
+    if (open_curly == maxed_out && open_bracket == maxed_out) {
+      return true;
+    }
+    if (min == open_curly) {
+      nests += 1;
+      open_curly = to_parse.find('{', open_curly + 1);
+      if (open_curly == -1) {
+        open_curly = maxed_out;
+      }
+    } else if (min == open_bracket) {
+      nests += 1;
+      open_bracket = to_parse.find('<', open_bracket + 1);
+      if (open_bracket == -1) {
+        open_bracket = maxed_out;
+      }
+    } else if (min == close_curly) {
+      nests -= 1;
+      close_curly = to_parse.find('}', close_curly + 1);
+      if (close_curly == -1) {
+        close_curly = maxed_out;
+      }
+    } else if (min == close_bracket) {
+      nests -= 1;
+      close_bracket = to_parse.find('>', close_bracket + 1);
+      if (close_bracket == -1) {
+        close_bracket = maxed_out;
+      }
+    }
+    min = std::min({open_curly, open_bracket, close_curly, close_bracket});
+  } while (nests < 100);
+  return false;
+}
+
 }  // namespace
 
 string SummarizeAttrValue(const AttrValue& attr_value) {
@@ -448,7 +499,12 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
   } else {
     to_parse = strings::StrCat(field_name, ": ", text);
   }
-
+  if (field_name == "tensor") {
+    if (!ParseAttrValueHelper_TensorNestsUnderLimit(kMaxTensorNestDepth,
+                                                    to_parse)) {
+      return false;
+    }
+  }
   return ProtoParseFromString(to_parse, out);
 }
 
diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
index 60c95e04799d13..f57c474fb579bb 100644
--- a/tensorflow/core/framework/common_shape_fns.cc
+++ b/tensorflow/core/framework/common_shape_fns.cc
@@ -664,6 +664,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
     int64 input_depth_value = c->Value(input_depth_dim),
           filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument("Depth of filter must not be 0");
     if (input_depth_value % filter_input_depth_value != 0)
       return errors::InvalidArgument(
           "Depth of input (", input_depth_value,
@@ -673,6 +675,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
       int64 num_groups = input_depth_value / filter_input_depth_value;
       if (c->ValueKnown(output_depth_dim)) {
         int64 output_depth_value = c->Value(output_depth_dim);
+        if (num_groups == 0)
+          return errors::InvalidArgument("Number of groups must not be 0");
         if (output_depth_value % num_groups != 0)
           return errors::InvalidArgument(
               "Depth of output (", output_depth_value,
@@ -803,6 +807,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
     int64 input_depth_value = c->Value(input_depth_dim),
           filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument("Depth of filter must not be 0");
     if (input_depth_value % filter_input_depth_value != 0)
       return errors::InvalidArgument(
           "Depth of input (", input_depth_value,
@@ -812,6 +818,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
       int64 num_groups = input_depth_value / filter_input_depth_value;
       if (c->ValueKnown(output_depth_dim)) {
         int64 output_depth_value = c->Value(output_depth_dim);
+        if (num_groups == 0)
+          return errors::InvalidArgument("Number of groups must not be 0");
         if (output_depth_value % num_groups != 0)
           return errors::InvalidArgument(
               "Depth of output (", output_depth_value,
@@ -2424,6 +2432,9 @@ Status SparseReduceShapeFn(InferenceContext* c) {
 
     int64 ndims = shape_vec.size();
     absl::flat_hash_set<int64> axes;
+    if (ndims == 0)
+      return errors::InvalidArgument(
+          "Number of dims in shape tensor must not be 0");
     for (int i = 0; i < axes_vec.size(); i++) {
       axes.insert((axes_vec(i) + ndims) % ndims);
     }
diff --git a/tensorflow/core/framework/lookup_interface.cc b/tensorflow/core/framework/lookup_interface.cc
index 117adbf65c42cd..77d3314b3e8440 100644
--- a/tensorflow/core/framework/lookup_interface.cc
+++ b/tensorflow/core/framework/lookup_interface.cc
@@ -83,10 +83,17 @@ Status LookupInterface::CheckFindArguments(const Tensor& key,
                                            const Tensor& default_value) {
   TF_RETURN_IF_ERROR(CheckKeyAndValueTypes(key, default_value));
   TF_RETURN_IF_ERROR(CheckKeyShape(key.shape()));
-  if (default_value.shape() != value_shape()) {
+  TensorShape fullsize_value_shape = key.shape();
+  for (int i = 0; i < key_shape().dims(); ++i) {
+    fullsize_value_shape.RemoveDim(fullsize_value_shape.dims() - 1);
+  }
+  fullsize_value_shape.AppendShape(value_shape());
+  if (default_value.shape() != value_shape() &&
+      default_value.shape() != fullsize_value_shape) {
     return errors::InvalidArgument(
-        "Expected shape ", value_shape().DebugString(),
-        " for default value, got ", default_value.shape().DebugString());
+        "Expected shape ", value_shape().DebugString(), " or ",
+        fullsize_value_shape.DebugString(), " for default value, got ",
+        default_value.shape().DebugString());
   }
   return Status::OK();
 }
diff --git a/tensorflow/core/framework/lookup_interface.h b/tensorflow/core/framework/lookup_interface.h
index 7e5dbe5632becb..04c5b0e4dc60a4 100644
--- a/tensorflow/core/framework/lookup_interface.h
+++ b/tensorflow/core/framework/lookup_interface.h
@@ -128,7 +128,8 @@ class LookupInterface : public ResourceBase {
   // requirements are satisfied, otherwise it returns InvalidArgument:
   // - DataType of the tensor keys equals to the table key_dtype
   // - DataType of the tensor default_value equals to the table value_dtype
-  // - the default_value tensor shape matches the table's value shape.
+  // - the default_value tensor has the required shape given keys and the 
+  //   tables's value shape.
   Status CheckFindArguments(const Tensor& keys, const Tensor& default_value);
 
   string DebugString() const override {
diff --git a/tensorflow/core/framework/partial_tensor_shape_test.cc b/tensorflow/core/framework/partial_tensor_shape_test.cc
index 54ae019f9b4812..2b6d417bfbc452 100644
--- a/tensorflow/core/framework/partial_tensor_shape_test.cc
+++ b/tensorflow/core/framework/partial_tensor_shape_test.cc
@@ -65,6 +65,19 @@ TEST(PartialTensorShapeTest, Concatenate) {
   EXPECT_EQ(-1, s4.num_elements());
 }
 
+TEST(PartialTensorShapeTest, ConcatenateWithStatus) {
+  PartialTensorShape s({10, 5, 20});
+  Status status = s.ConcatenateWithStatus(400, &s);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(400000, s.num_elements());
+  ASSERT_EQ(4, s.dims());
+
+  status = s.ConcatenateWithStatus(-10, &s);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(-1, s.num_elements());
+  ASSERT_EQ(5, s.dims());
+}
+
 TEST(PartialTensorShapeTest, InvalidShapeProto) {
   TensorShapeProto proto;
   EXPECT_TRUE(PartialTensorShape::IsValid(proto));
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 03499ec0220650..a611b7a7f76029 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -48,6 +48,7 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -725,11 +726,11 @@ bool Tensor::RefCountIsOne() const {
 // The macro CASES() expands to a switch statement conditioned on
 // TYPE_ENUM. Each case expands the STMTS after a typedef for T.
 #define SINGLE_ARG(...) __VA_ARGS__
-#define CASE(TYPE, STMTS)             \
-  case DataTypeToEnum<TYPE>::value: { \
-    typedef TYPE T;                   \
-    STMTS;                            \
-    break;                            \
+#define CASE(TYPE, STMTS)               \
+  case DataTypeToEnum<TYPE>::value: {   \
+    typedef TF_ATTRIBUTE_UNUSED TYPE T; \
+    STMTS;                              \
+    break;                              \
   }
 #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
   switch (TYPE_ENUM) {                                         \
@@ -765,9 +766,8 @@ bool Tensor::RefCountIsOne() const {
   }
 
 #define CASES(TYPE_ENUM, STMTS)                                      \
-  CASES_WITH_DEFAULT(TYPE_ENUM, STMTS,                               \
-                     LOG(FATAL) << "Unexpected type: " << TYPE_ENUM; \
-                     , LOG(FATAL) << "Type not set";)
+  CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
+                     , LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
 
 Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape)
     : shape_(shape), buf_(nullptr) {
@@ -797,6 +797,16 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape,
   }
 }
 
+Status Tensor::BuildTensor(DataType type, const TensorShape& shape,
+                           Tensor* out_tensor) {
+  // Avoid crashes due to invalid or unsupported types.
+  CASES_WITH_DEFAULT(
+      type, {}, return errors::InvalidArgument("Type not set"),
+      return errors::InvalidArgument("Unexpected type: ", DataType_Name(type)));
+  *out_tensor = Tensor(type, shape);
+  return Status::OK();
+}
+
 // NOTE(mrry): The default allocator for a Tensor (when none is specified) is
 // the default CPU allocator for NUMA zone 0. Accessing that currently involves
 // acquiring a lock, which guards initialization of the per-NUMA zone
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 6aa079e8b5c53c..50b6c0953ac3ad 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -164,6 +164,15 @@ class Tensor {
   /// for details.
   explicit Tensor(DataType type);
 
+  /// \brief Initializes a tensor with the input `type` and `shape`, or returns
+  /// an error and leaves `out_tensor` unmodified. This factory method should be
+  /// used instead of the corresponding constructor if calling code cannot
+  /// validate that the `DataType` is valid and supported.
+  ///
+  /// The underlying buffer is allocated using a `CPUAllocator`.
+  static Status BuildTensor(DataType type, const TensorShape& shape,
+                            Tensor* out_tensor);
+
  private:
   // A tag type for selecting the `Tensor` constructor overload that creates a
   // scalar tensor in host memory.
diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc
index b564ac144ccab2..5144577e7aa0f5 100644
--- a/tensorflow/core/framework/tensor_shape.cc
+++ b/tensorflow/core/framework/tensor_shape.cc
@@ -20,7 +20,9 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
@@ -153,11 +155,44 @@ TensorShapeBase<Shape>::TensorShapeBase(const TensorShapeProto& proto) {
   }
 }
 
+template <class Shape>
+Status TensorShapeBase<Shape>::BuildTensorShapeBase(
+    const TensorShapeProto& proto, TensorShapeBase* out) {
+  out->set_tag(REP16);
+  out->set_data_type(DT_INVALID);
+  // NOTE(irving): Unfortunately, TensorShape allows parsing protos with
+  // unknown_shape() set, and it seems hard to remove this without backwards
+  // compatibility issues.
+  if (kIsPartial && proto.unknown_rank()) {
+    out->set_ndims_byte(kUnknownRank);
+    out->set_num_elements(-1);
+  } else {
+    out->set_ndims_byte(0);
+    out->set_num_elements(1);
+    Status s = Status::OK();
+    for (const auto& d : proto.dim()) {
+      s = out->AddDimWithStatus(d.size());
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+  return Status::OK();
+}
+
 template <class Shape>
 TensorShapeBase<Shape>::TensorShapeBase(gtl::ArraySlice<int64> dim_sizes) {
   set_tag(REP16);
   set_data_type(DT_INVALID);
-  InitDims(dim_sizes);
+  TF_CHECK_OK(InitDims(dim_sizes));
+}
+
+template <class Shape>
+Status TensorShapeBase<Shape>::BuildTensorShapeBase(
+    gtl::ArraySlice<int64> dim_sizes, TensorShapeBase* out) {
+  out->set_tag(REP16);
+  out->set_data_type(DT_INVALID);
+  return out->InitDims(dim_sizes);
 }
 
 // Returns true iff partial is true and val is < 0.
@@ -169,15 +204,13 @@ static inline bool Set16(bool partial, uint16* dst, int dim, int64 val) {
       dst[dim] = std::numeric_limits<uint16>::max();
       return true;
     }
-  } else {
-    CHECK_GE(val, 0);
   }
   dst[dim] = val;
   return false;
 }
 
 template <class Shape>
-void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
+Status TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
   DCHECK_EQ(tag(), REP16);
 
   // Allow sizes that are under kint64max^0.25 so that 4-way multiplication
@@ -193,6 +226,15 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
     }
   }
 
+  if (!kIsPartial && !large_size) {
+    for (auto s : dim_sizes) {
+      if (TF_PREDICT_FALSE(s < 0)) {
+        return errors::Internal(
+            "Expected shape dimensions to be non-negative, got ", s);
+      }
+    }
+  }
+
   if (!large_size) {
     // Every size fits in 16 bits; use fast-paths for dims in {1,2,3,4}.
     uint16* dst = as16()->dims_;
@@ -202,7 +244,7 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
         const int64 size = dim_sizes[0];
         const bool neg = Set16(kIsPartial, dst, 0, size);
         set_num_elements(neg ? -1 : size);
-        return;
+        return Status::OK();
       }
       case 2: {
         set_ndims_byte(2);
@@ -211,7 +253,7 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
         bool neg = Set16(kIsPartial, dst, 0, size0);
         neg |= Set16(kIsPartial, dst, 1, size1);
         set_num_elements(neg ? -1 : (size0 * size1));
-        return;
+        return Status::OK();
       }
       case 3: {
         set_ndims_byte(3);
@@ -222,7 +264,7 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
         neg |= Set16(kIsPartial, dst, 1, size1);
         neg |= Set16(kIsPartial, dst, 2, size2);
         set_num_elements(neg ? -1 : (size0 * size1 * size2));
-        return;
+        return Status::OK();
       }
       case 4: {
         set_ndims_byte(4);
@@ -235,16 +277,22 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
         neg |= Set16(kIsPartial, dst, 2, size2);
         neg |= Set16(kIsPartial, dst, 3, size3);
         set_num_elements(neg ? -1 : (size0 * size1 * size2 * size3));
-        return;
+        return Status::OK();
       }
     }
   }
 
   set_ndims_byte(0);
   set_num_elements(1);
+  Status status = Status::OK();
   for (int64 s : dim_sizes) {
-    AddDim(internal::SubtleMustCopy(s));
+    status.Update(AddDimWithStatus(internal::SubtleMustCopy(s)));
+    if (!status.ok()) {
+      return status;
+    }
   }
+
+  return status;
 }
 
 template <class Shape>
@@ -322,10 +370,10 @@ void TensorShapeRep::ClearAllButDataType() {
 }
 
 template <class Shape>
-void TensorShapeBase<Shape>::RecomputeNumElements() {
+Status TensorShapeBase<Shape>::RecomputeNumElements() {
   if (unknown_rank()) {
     set_num_elements(-1);
-    return;
+    return Status::OK();
   }
   int64 n = 1;
   for (auto dim : *this) {
@@ -334,9 +382,14 @@ void TensorShapeBase<Shape>::RecomputeNumElements() {
       break;
     }
     n = MultiplyWithoutOverflow(n, dim.size);
-    CHECK_LE(0, n);
+    if (TF_PREDICT_FALSE(n < 0)) {
+      return errors::InvalidArgument(
+          "Shape ", this->DebugString(),
+          " results in overflow when computing number of elements");
+    }
   }
   set_num_elements(n);
+  return Status::OK();
 }
 
 template <class Shape>
@@ -354,6 +407,38 @@ void TensorShapeBase<Shape>::AddDim(int64 size) {
   UnsafeAddDim(size, new_num_elements);
 }
 
+template <class Shape>
+Status TensorShapeBase<Shape>::AddDimWithStatus(int64 size) {
+  if (!kIsPartial) {
+    if (TF_PREDICT_FALSE(size < 0)) {
+      return errors::Internal("Expected a non-negative size, got ", size);
+    }
+  }
+
+  if (unknown_rank()) {
+    return Status::OK();
+  }
+
+  if (TF_PREDICT_FALSE(ndims_byte() >= MaxDimensions())) {
+    return errors::Internal("Too many dimensions in tensor");
+  }
+
+  int64 new_num_elements;
+  if (kIsPartial && (num_elements() < 0 || size < 0)) {
+    new_num_elements = -1;
+  } else {
+    new_num_elements = MultiplyWithoutOverflow(num_elements(), size);
+    if (TF_PREDICT_FALSE(new_num_elements < 0)) {
+      return errors::Internal("Encountered overflow when multiplying ",
+                              num_elements(), " with ", size,
+                              ", result: ", new_num_elements);
+    }
+  }
+
+  UnsafeAddDim(size, new_num_elements);
+  return Status::OK();
+}
+
 template <class Shape>
 void TensorShapeBase<Shape>::UnsafeAddDim(int64 size, int64 new_num_elements) {
   const int nd = ndims_byte();
@@ -404,6 +489,19 @@ void TensorShapeBase<Shape>::AppendShape(const TensorShapeBase& shape) {
   for (auto d : shape) AddDim(d.size);
 }
 
+template <class Shape>
+Status TensorShapeBase<Shape>::AppendShapeWithStatus(
+    const TensorShapeBase& shape) {
+  Status s = Status::OK();
+  for (auto d : shape) {
+    s.Update(AddDimWithStatus(d.size));
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return s;
+}
+
 template <class Shape>
 void TensorShapeBase<Shape>::InsertDim(int d, int64 size) {
   CHECK_GE(d, 0);
@@ -419,6 +517,42 @@ void TensorShapeBase<Shape>::InsertDim(int d, int64 size) {
   }
 }
 
+template <class Shape>
+Status TensorShapeBase<Shape>::InsertDimWithStatus(int d, int64 size) {
+  if (!kIsPartial) {
+    if (TF_PREDICT_FALSE(size < 0)) {
+      return errors::Internal("Expected a non-negative size, got ", size);
+    }
+  }
+
+  if (TF_PREDICT_FALSE(d < 0)) {
+    return errors::Internal("The insertion index must be non-negative, got ",
+                            d);
+  }
+  if (TF_PREDICT_FALSE(d > dims())) {
+    return errors::Internal("The insertion index must be at most ", dims(),
+                            " got ", d);
+  }
+  if (TF_PREDICT_FALSE(dims() >= MaxDimensions())) {
+    return errors::Internal("Shape has ", dims(),
+                            " dimensions which is the maximum allowed");
+  }
+
+  gtl::InlinedVector<int64, 8> vals;
+  AppendTo(*this, &vals);
+  vals.insert(vals.begin() + d, size);
+  ClearAllButDataType();
+
+  Status s = Status::OK();
+  for (auto dval : vals) {
+    s.Update(AddDimWithStatus(dval));
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  return s;
+}
+
 template <class Shape>
 gtl::InlinedVector<int64, 4> TensorShapeBase<Shape>::dim_sizes() const {
   gtl::InlinedVector<int64, 4> result;
@@ -451,7 +585,46 @@ void TensorShapeBase<Shape>::set_dim(int d, int64 size) {
       AddDim(dval);
     }
   }
-  RecomputeNumElements();
+  TF_CHECK_OK(RecomputeNumElements());
+}
+
+template <class Shape>
+Status TensorShapeBase<Shape>::SetDimWithStatus(int d, int64 size) {
+  if (TF_PREDICT_FALSE(d < 0)) {
+    return errors::Internal("Index must be non-negative, got ", d);
+  }
+  if (TF_PREDICT_FALSE(d >= dims())) {
+    return errors::Internal("Index must be less than ", dims(), ", got ", d);
+  }
+  if (TF_PREDICT_FALSE(size < 0)) {
+    return errors::Internal("Expected a non-negative size, got ", size);
+  }
+
+  if (tag() == REP16 && size < kMaxRep16) {
+    as16()->dims_[d] =
+        kIsPartial && size < 0 ? kUnknownRep16 : static_cast<uint16>(size);
+  } else if (tag() == REP32 && size < kMaxRep32) {
+    as32()->dims_[d] =
+        kIsPartial && size < 0 ? kUnknownRep32 : static_cast<uint32>(size);
+  } else if (tag() == REP_OUT_OF_LINE) {
+    (*as64()->dims_)[d] = size;
+  } else {
+    // Must upgrade
+    gtl::InlinedVector<int64, 8> vals;
+    AppendTo(*this, &vals);
+    vals[d] = size;
+    ClearAllButDataType();
+
+    Status s = Status::OK();
+    for (auto dval : vals) {
+      s.Update(AddDimWithStatus(dval));
+      if (!s.ok()) {
+        return s;
+      }
+    }
+  }
+
+  return RecomputeNumElements();
 }
 
 template <class Shape>
@@ -471,7 +644,51 @@ void TensorShapeBase<Shape>::RemoveDimRange(int begin, int end) {
   for (auto dval : vals) {
     AddDim(dval);
   }
-  RecomputeNumElements();
+  TF_CHECK_OK(RecomputeNumElements());
+}
+
+template <class Shape>
+Status TensorShapeBase<Shape>::RemoveDimRangeWithStatus(int begin, int end) {
+  if (unknown_rank()) {
+    return Status::OK();
+  }
+
+  begin = begin < 0 ? dims() + begin + 1 : begin;
+  end = end < 0 ? dims() + end + 1 : end;
+
+  if (TF_PREDICT_FALSE(begin < 0)) {
+    return errors::Internal("Start index must be non-negative, got ", begin);
+  }
+  if (TF_PREDICT_FALSE(begin > dims())) {
+    return errors::Internal("Start index must be less than ", dims(), ", got ",
+                            begin);
+  }
+  if (TF_PREDICT_FALSE(end < 0)) {
+    return errors::Internal("End index must be non-negative, got ", end);
+  }
+  if (TF_PREDICT_FALSE(end > dims())) {
+    return errors::Internal("End index must be less than ", dims(), ", got ",
+                            end);
+  }
+
+  if (begin >= end) {
+    return Status::OK();
+  }
+
+  gtl::InlinedVector<int64, 8> vals;
+  AppendTo(*this, &vals);
+  vals.erase(vals.begin() + begin, vals.begin() + end);
+  ClearAllButDataType();
+
+  Status s = Status::OK();
+  for (auto dval : vals) {
+    s.Update(AddDimWithStatus(dval));
+    if (!s.ok()) {
+      return s;
+    }
+  }
+
+  return RecomputeNumElements();
 }
 
 bool TensorShape::IsSameSize(const TensorShape& b) const {
@@ -635,6 +852,12 @@ PartialTensorShape PartialTensorShape::Concatenate(int64 size) const {
   return out;
 }
 
+Status PartialTensorShape::ConcatenateWithStatus(
+    int64 size, PartialTensorShape* out) const {
+  out = const_cast<PartialTensorShape*>(this);
+  return out->AddDimWithStatus(size);
+}
+
 PartialTensorShape PartialTensorShape::Concatenate(
     const PartialTensorShape& shape) const {
   if (unknown_rank() || shape.unknown_rank()) {
@@ -645,6 +868,21 @@ PartialTensorShape PartialTensorShape::Concatenate(
   return out;
 }
 
+Status PartialTensorShape::ConcatenateWithStatus(
+    const PartialTensorShape& shape, PartialTensorShape* out) const {
+  if (unknown_rank() || shape.unknown_rank()) {
+    *out = PartialTensorShape();
+    return Status::OK();
+  }
+  out = const_cast<PartialTensorShape*>(this);
+  for (auto dim : shape) {
+    Status s = out->AddDimWithStatus(dim.size);
+    if (!s.ok()) return s;
+  }
+
+  return Status::OK();
+}
+
 Status PartialTensorShape::MergeWith(const PartialTensorShape& shape,
                                      PartialTensorShape* result) const {
   if (unknown_rank()) {
@@ -661,8 +899,14 @@ Status PartialTensorShape::MergeWith(const PartialTensorShape& shape,
         "PartialTensorShape: Incompatible ranks during merge: ", dims_, " vs. ",
         shape.dims());
   }
-  CHECK(result != this);
+
+  if (result == this) {
+    return errors::Internal(
+        "PartialTensorShape::MergeWith: cannot merge shape with itself");
+  }
+
   result->Clear();
+  Status s = Status::OK();
   for (int i = 0; i < dims_; ++i) {
     const int64 dim0 = dim_size(i);
     const int64 dim1 = shape.dim_size(i);
@@ -671,7 +915,10 @@ Status PartialTensorShape::MergeWith(const PartialTensorShape& shape,
           "PartialTensorShape: Incompatible shapes during merge: ",
           DebugString(), " vs. ", shape.DebugString());
     }
-    result->AddDim(dim0 >= 0 ? dim0 : dim1);
+    s.Update(result->AddDimWithStatus(dim0 >= 0 ? dim0 : dim1));
+    if (!s.ok()) {
+      return s;
+    }
   }
   return Status::OK();
 }
diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h
index 70253f4e7c8e39..a690123f0ceaf9 100644
--- a/tensorflow/core/framework/tensor_shape.h
+++ b/tensorflow/core/framework/tensor_shape.h
@@ -26,7 +26,9 @@ limitations under the License.
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
 
 namespace tensorflow {
 
@@ -172,8 +174,22 @@ class TensorShapeBase : public TensorShapeRep {
   /// Construct an empty TensorShape, or an unknown rank PartialTensorShape
   TensorShapeBase();
 
+  // TODO(mihaimaruseac): Mark this explicit in a subsequent change
   TensorShapeBase(const TensorShapeProto& proto);
 
+  // These factory methods should be used instead of the constructors that take
+  // an array of sizes if calling code cannot validate that the sizes specify a
+  // valid `TensorShape`.
+  // The value in `*out` is valid iff the returned value is `Status::OK`.
+  static Status BuildTensorShapeBase(gtl::ArraySlice<int64> dim_sizes,
+                                     TensorShapeBase* out);
+  static Status BuildTensorShapeBase(std::initializer_list<int64> dim_sizes,
+                                     TensorShapeBase* out) {
+    return BuildTensorShapeBase(gtl::ArraySlice<int64>(dim_sizes), out);
+  }
+  static Status BuildTensorShapeBase(const TensorShapeProto& proto,
+                                     TensorShapeBase* out);
+
   /// Returns `true` iff `proto` is a valid tensor shape.
   // For TensorShape, the proto shape must be fully defined.
   static bool IsValid(const TensorShapeProto& proto);
@@ -189,19 +205,37 @@ class TensorShapeBase : public TensorShapeRep {
   /// REQUIRES: `size >= 0`
   void AddDim(int64 size);
 
+  /// Same as `AddDim` but returns a `Status`.
+  /// Use if unsure is `size >= 0`, to prevent `CHECK`-crashes.
+  Status AddDimWithStatus(int64 size);
+
   /// Appends all the dimensions from `shape`.
   void AppendShape(const TensorShapeBase& shape);
 
+  /// Same as `RemoveDim` but returns a `Status`.
+  /// Use if you cannot validate all invariants, to prevent `CHECK`-fail.
+  Status AppendShapeWithStatus(const TensorShapeBase& shape);
+
   /// \brief Insert a dimension somewhere in the `TensorShape`.
   /// REQUIRES: `0 <= d <= dims()`
   /// REQUIRES: `size >= 0`
   void InsertDim(int d, int64 size);
 
+  /// Same as `InsertDim` but returns a `Status`.
+  /// Use if unsure if requirements in `InsertDim` are satistified, to prevent
+  /// `CHECK`-fail crashes.
+  Status InsertDimWithStatus(int d, int64 size);
+
   /// \brief Modifies the size of the dimension `d` to be `size`
   /// REQUIRES: `0 <= d < dims()`
   /// REQUIRES: `size >= 0`
   void set_dim(int d, int64 size);
 
+  /// Same as `set_dim` but returns a `Status`.
+  /// Use if unsure if requirements in `set_dim` are satistified, to prevent
+  /// `CHECK`-fail crashes.
+  Status SetDimWithStatus(int d, int64 size);
+
   /// \brief Removes dimension `d` from the `TensorShape`.
   /// REQUIRES: `0 <= d < dims()`
   void RemoveDim(int d) {
@@ -209,6 +243,16 @@ class TensorShapeBase : public TensorShapeRep {
     RemoveDimRange(d, d + 1);
   }
 
+  /// Same as `RemoveDim` but returns a `Status`.
+  /// Use if unsure is `0 <= d < dims()`, to prevent `CHECK`-crashes.
+  Status RemoveDimWithStatus(int64 d) {
+    if (TF_PREDICT_FALSE(d < 0)) {
+      return errors::Internal(
+          "Expected dimension index to be non-negative, got ", d);
+    }
+    return RemoveDimRangeWithStatus(d, d + 1);
+  }
+
   /// \brief Removes last `n` dimensions from the `TensorShape`.
   /// REQUIRES: `0 <= n <= dims()`
   void RemoveLastDims(int n) {
@@ -216,12 +260,28 @@ class TensorShapeBase : public TensorShapeRep {
     RemoveDimRange(dims() - n, dims());
   }
 
+  /// Same as `RemoveLastDims` but returns a `Status`.
+  /// Use if unsure is `0 <= n <= dims()`, to prevent `CHECK`-crashes.
+  Status RemoveLastDimsWithStatus(int64 n) {
+    if (TF_PREDICT_FALSE(n < dims())) {
+      return errors::Internal("Expected dimension index to be at most ", dims(),
+                              " got ", n);
+    }
+    return RemoveDimRangeWithStatus(dims() - n, dims());
+  }
+
   /// \brief Removes the dimensions in range `[begin:end)` from `TensorShape`.
   /// Negative values of `end` are interpreted as `dims() + end + 1` (as in
-  /// Python). The same is true for negative values of `begin`. REQUIRES:
-  /// `-(dims()+1) <= begin <= dims()` REQUIRES: `-(dims()+1) <= end <= dims()`
+  /// Python). The same is true for negative values of `begin`.
+  /// REQUIRES: `-(dims()+1) <= begin <= dims()`
+  /// REQUIRES: `-(dims()+1) <= end <= dims()`
   void RemoveDimRange(int begin, int end);
 
+  /// Same as `RemoveDimRange` but returns a `Status`.
+  /// Use if unsure if requirements in `RemoveDimRange` are satistified, to
+  /// prevent `CHECK`-fail crashes.
+  Status RemoveDimRangeWithStatus(int begin, int end);
+
   /// Return whether the rank is unknown
   bool unknown_rank() const {
     return kIsPartial && ndims_byte() == kUnknownRank;
@@ -263,8 +323,8 @@ class TensorShapeBase : public TensorShapeRep {
   explicit TensorShapeBase(DataType dt);
 
  private:
-  void RecomputeNumElements();
-  void InitDims(gtl::ArraySlice<int64> dim_sizes);
+  Status RecomputeNumElements();
+  Status InitDims(gtl::ArraySlice<int64> dim_sizes);
 
   // True for PartialTensorShape, false for TensorShape
   static constexpr bool kIsPartial =
@@ -314,6 +374,13 @@ class TensorShape : public TensorShapeBase<TensorShape> {
   template <int NDIMS, typename IndexType = Eigen::DenseIndex>
   Eigen::DSizes<IndexType, NDIMS> AsEigenDSizes() const;
 
+  // Same as `AsEigenDSizes()` but returns a `Status` instead.
+  // Use this method to surface error to user instead of crashing if `NDMIS` is
+  // not equal to `dims()`.
+  // Caller must take ownership of `out`.
+  template <int NDIMS, typename IndexType = Eigen::DenseIndex>
+  Status AsEigenDSizesWithStatus(Eigen::DSizes<IndexType, NDIMS>* out) const;
+
   /// Same as `AsEigenDSizes()` but allows for `NDIMS > dims()` -- in
   /// which case we pad the rest of the sizes with 1.
   /// Notice: Using IndexType=int32 in combination with To32Bit() can
@@ -321,6 +388,14 @@ class TensorShape : public TensorShapeBase<TensorShape> {
   template <int NDIMS, typename IndexType = Eigen::DenseIndex>
   Eigen::DSizes<IndexType, NDIMS> AsEigenDSizesWithPadding() const;
 
+  // Same as `AsEigenDSizesWithPadding()` but returns a `Status` instead.
+  // Use this method to surface error to user instead of crashing if `NDMIS` is
+  // not equal to `dims()`.
+  // Caller must take ownership of `out`.
+  template <int NDIMS, typename IndexType = Eigen::DenseIndex>
+  Status AsEigenDSizesWithPaddingWithStatus(
+      Eigen::DSizes<IndexType, NDIMS>* out) const;
+
  private:
   // These CHECK fail to ease debugging.
   // REQUIRES: dims() == NDIMS
@@ -328,6 +403,18 @@ class TensorShape : public TensorShapeBase<TensorShape> {
   // REQUIRES: dims() >= NDIMS
   void CheckDimsAtLeast(int NDIMS) const;
 
+  // Fill output from `*this`.
+  // Helper method for common code between `AsEigenDSize()` and
+  // `AsEigenDSizeWithStatus()`.
+  template <int NDIMS, typename IndexType = Eigen::DenseIndex>
+  Eigen::DSizes<IndexType, NDIMS> AsEigenDSizesCopy() const;
+
+  // Fill output from `*this`.
+  // Helper method for common code between `AsEigenDSizesWithPadding()` and
+  // `AsEigenDSizeWithPaddingWithStatus()`.
+  template <int NDIMS, typename IndexType = Eigen::DenseIndex>
+  Eigen::DSizes<IndexType, NDIMS> AsEigenDSizesCopyAndPad() const;
+
   // For access to TensorShapeBase(DataType).
   friend class Tensor;
 };
@@ -426,10 +513,21 @@ class PartialTensorShape : public TensorShapeBase<PartialTensorShape> {
   /// REQUIRES: `size >= -1`, where -1 means unknown
   PartialTensorShape Concatenate(int64 size) const;
 
+  /// Similar to `Concatenate` but returning `Status`.
+  /// Use if calling code cannot validate all requirements and if `CHECK`-fails
+  /// are to be avoided.
+  Status ConcatenateWithStatus(int64 size, PartialTensorShape* out) const;
+
   /// Appends all the dimensions from `shape`.  Returns a new
   /// PartialTensorShape.
   PartialTensorShape Concatenate(const PartialTensorShape& shape) const;
 
+  /// Similar to `Concatenate` but returning `Status`.
+  /// Use if calling code cannot validate all requirements and if `CHECK`-fails
+  /// are to be avoided.
+  Status ConcatenateWithStatus(const PartialTensorShape& shape,
+                               PartialTensorShape* out) const;
+
   /// Merges all the dimensions from `shape`.  Returns
   /// `InvalidArgument` error if either `shape` has a different rank
   /// or if any of the dimensions are incompatible.
@@ -481,14 +579,16 @@ class PartialTensorShapeUtils {
 // ----------------------------------------------------------------------------
 
 template <int NDIMS, typename IndexType>
-Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizes() const {
-  CheckDimsEqual(NDIMS);
-  return AsEigenDSizesWithPadding<NDIMS, IndexType>();
+Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizesCopy() const {
+  Eigen::DSizes<IndexType, NDIMS> dsizes;
+  for (int d = 0; d < NDIMS; d++) {
+    dsizes[d] = static_cast<IndexType>(dim_size(d));
+  }
+  return dsizes;
 }
 
 template <int NDIMS, typename IndexType>
-Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizesWithPadding() const {
-  CheckDimsAtLeast(NDIMS);
+Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizesCopyAndPad() const {
   static_assert(NDIMS <= TensorShape::MaxDimensions(), "Too many dimensions");
   Eigen::DSizes<IndexType, NDIMS> dsizes;
   for (int d = 0; d < dims(); d++) {
@@ -500,6 +600,40 @@ Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizesWithPadding() const {
   return dsizes;
 }
 
+template <int NDIMS, typename IndexType>
+Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizes() const {
+  CheckDimsEqual(NDIMS);
+  return AsEigenDSizesCopy<NDIMS, IndexType>();
+}
+
+template <int NDIMS, typename IndexType>
+Status TensorShape::AsEigenDSizesWithStatus(
+    Eigen::DSizes<IndexType, NDIMS>* out) const {
+  if (TF_PREDICT_FALSE(NDIMS != dims())) {
+    return errors::Internal("Asking for tensor of ", NDIMS,
+                            " dimensions from a tensor of ", dims(),
+                            " dimensions");
+  }
+  *out = AsEigenDSizesCopy<NDIMS, IndexType>();
+}
+
+template <int NDIMS, typename IndexType>
+Eigen::DSizes<IndexType, NDIMS> TensorShape::AsEigenDSizesWithPadding() const {
+  CheckDimsAtLeast(NDIMS);
+  return AsEigenDSizesCopyAndPad<NDIMS, IndexType>();
+}
+
+template <int NDIMS, typename IndexType>
+Status TensorShape::AsEigenDSizesWithPaddingWithStatus(
+    Eigen::DSizes<IndexType, NDIMS>* out) const {
+  if (TF_PREDICT_FALSE(NDIMS < dims())) {
+    return errors::Internal("Asking for tensor of at least ", NDIMS,
+                            " dimensions from a tensor of ", dims(),
+                            " dimensions");
+  }
+  *out = AsEigenDSizesCopyAndPad<NDIMS, IndexType>();
+}
+
 // ----------------------------------------------------------------------------
 // Inlining of some performance critical routines
 // ----------------------------------------------------------------------------
diff --git a/tensorflow/core/framework/tensor_shape_test.cc b/tensorflow/core/framework/tensor_shape_test.cc
index ea93009ef40705..43f3e4304646ce 100644
--- a/tensorflow/core/framework/tensor_shape_test.cc
+++ b/tensorflow/core/framework/tensor_shape_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 
 namespace tensorflow {
 class TensorShapeTestHelper {
@@ -205,6 +206,28 @@ TEST(TensorShapeTest, ostream) {
   EXPECT_EQ(ss.str(), "[10,5,4]");
 }
 
+TEST(TensorShapeTest, AddDimWithStatus) {
+  TensorShape s({10, 5, 20});
+  Status status = s.AddDimWithStatus(400);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(400000, s.num_elements());
+  ASSERT_EQ(4, s.dims());
+
+  status = s.AddDimWithStatus(-1);
+  EXPECT_EQ(tensorflow::error::INTERNAL, status.code());
+}
+
+TEST(TensorShapeTest, Factory) {
+  TensorShape s;
+  Status status = TensorShape::BuildTensorShapeBase({10, 5, 20}, &s);
+  EXPECT_TRUE(status.ok());
+  EXPECT_EQ(1000, s.num_elements());
+  ASSERT_EQ(3, s.dims());
+
+  status = TensorShape::BuildTensorShapeBase({-10, 5, 20}, &s);
+  EXPECT_EQ(tensorflow::error::INTERNAL, status.code());
+}
+
 // -----------------------------------------------------------------------
 // An old implementation of TensorShape using a different representation,
 // preserved here in the unittest to allow us to have a randomized unittest
diff --git a/tensorflow/core/framework/tensor_slice.cc b/tensorflow/core/framework/tensor_slice.cc
index 975e1e2e24a439..7041b011157434 100644
--- a/tensorflow/core/framework/tensor_slice.cc
+++ b/tensorflow/core/framework/tensor_slice.cc
@@ -14,7 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/tensor_slice.h"
+
+#include <limits>
 #include <vector>
+
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -44,6 +47,34 @@ TensorSlice::TensorSlice(
   }
 }
 
+Status TensorSlice::BuildTensorSlice(const TensorSliceProto& proto,
+                                     TensorSlice* output) {
+  output->Clear();
+  output->starts_.reserve(proto.extent_size());
+  output->lengths_.reserve(proto.extent_size());
+  for (const auto& e : proto.extent()) {
+    int64_t l = GetExtentLength(e);
+    if (e.start() != 0 || l != kFullExtent) {
+      if (e.start() < 0 || l <= 0) {
+        return errors::InvalidArgument(
+            "Expected non-negative start and positive length but got start = ",
+            e.start(), ", length = ", l, ": extent = ", e.ShortDebugString());
+      }
+      // Calculating the extent end must not cause signed integer overflow.
+      if (static_cast<uint64_t>(e.start()) + static_cast<uint64_t>(e.length()) >
+          std::numeric_limits<int64_t>::max()) {
+        return errors::InvalidArgument(
+            "Extent end exceeds the maximum possible size: extent = ",
+            e.ShortDebugString());
+      }
+    }
+    output->starts_.push_back(e.start());
+    output->lengths_.push_back(l);
+  }
+
+  return Status::OK();
+}
+
 Status TensorSlice::Parse(const string& str, TensorSlice* slice) {
   std::vector<string> items = str_util::Split(str, ':', str_util::SkipEmpty());
   slice->starts_.reserve(items.size());
diff --git a/tensorflow/core/framework/tensor_slice.h b/tensorflow/core/framework/tensor_slice.h
index 82f21fb17eec78..4c2795694564da 100644
--- a/tensorflow/core/framework/tensor_slice.h
+++ b/tensorflow/core/framework/tensor_slice.h
@@ -47,6 +47,12 @@ class TensorSlice {
   explicit TensorSlice(const TensorSliceProto& proto);
   explicit TensorSlice(std::initializer_list<std::pair<int64, int64>> extents);
 
+  // This factory methods should be used instead of the constructor that takes a
+  // `TensorSliceProto` if calling code cannot validate that the sizes specify a
+  // valid `TensorSlice`.
+  static Status BuildTensorSlice(const TensorSliceProto& proto,
+                                 TensorSlice* output);
+
   static Status Parse(const string& str, TensorSlice* output);
   static TensorSlice ParseOrDie(const string& str) {
     TensorSlice ret;
diff --git a/tensorflow/core/framework/tensor_slice_test.cc b/tensorflow/core/framework/tensor_slice_test.cc
index 54e680484e228b..69b7c7cd084e33 100644
--- a/tensorflow/core/framework/tensor_slice_test.cc
+++ b/tensorflow/core/framework/tensor_slice_test.cc
@@ -15,6 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/framework/tensor_slice.h"
 
+#include <limits>
+
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -123,6 +125,48 @@ TEST(TensorSliceTest, Serialization) {
   }
 }
 
+// Testing `BuildTensorSlice` with valid and invalid input protos.
+TEST(TensorSliceTest, BuildTensorSlice) {
+  TensorSliceProto proto;
+  TensorSlice({{0, -1}, {0, 10}, {14, 1}}).AsProto(&proto);
+  TensorSlice s;
+
+  // Successful building.
+  {
+    TF_ASSERT_OK(TensorSlice::BuildTensorSlice(proto, &s));
+    EXPECT_EQ("-:0,10:14,1", s.DebugString());
+  }
+
+  // Failed building due to negative extent start.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(0)->set_start(-1);
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to negative extent length.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->set_length(-1);
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to missing extent length.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->clear_length();
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to extent end overflowing.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->set_length(
+        std::numeric_limits<int64_t>::max());
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+}
+
 // Testing the slice intersection
 TEST(TensorSliceTest, Intersection) {
   // "EVERYTHING" intersects with everything
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index f0a15cbce499c2..9f3d431d4c325b 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -2028,6 +2028,12 @@ class ReorderCastLikeAndValuePreserving : public ArithmeticOptimizerStage {
 
   Status TrySimplify(NodeDef* consumer, string* simplified_node_name) override {
     NodeDef* producer;
+
+    if (consumer->input_size() < 1) {
+      return errors::FailedPrecondition("Node ", simplified_node_name,
+                                        " lacks inputs");
+    }
+
     TF_RETURN_IF_ERROR(GetInputNode(consumer->input(0), &producer));
     const bool producer_is_cast = IsCastLike(*producer);
     const bool can_optimize =
@@ -2430,6 +2436,11 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage {
   ~ReplaceMulWithSquare() override = default;
 
   bool IsSupported(const NodeDef* node) const override {
+    if (!node || node->input_size() < 2) {
+      // Invalid node
+      return false;
+    }
+
     return IsAnyMul(*node) && node->input(0) == node->input(1);
   }
 
diff --git a/tensorflow/core/grappler/optimizers/auto_parallel.cc b/tensorflow/core/grappler/optimizers/auto_parallel.cc
index a537fa256babba..e4e8009f9ccb20 100644
--- a/tensorflow/core/grappler/optimizers/auto_parallel.cc
+++ b/tensorflow/core/grappler/optimizers/auto_parallel.cc
@@ -152,7 +152,7 @@ Status AutoParallel::Initialize(const GrapplerItem& item) {
   TF_RETURN_IF_ERROR(ComputeTransitiveFanin(graph_, item.fetch, &train_nodes));
   LOG(INFO) << "Number of training nodes: " << train_nodes.size();
 
-  const NodeDef* dequeue_node;
+  const NodeDef* dequeue_node = nullptr;
   for (const auto& train_node : train_nodes) {
     if (IsDequeueOp(*train_node)) {
       dequeue_node = train_node;
diff --git a/tensorflow/core/grappler/optimizers/auto_parallel_test.cc b/tensorflow/core/grappler/optimizers/auto_parallel_test.cc
index 1c3186f1ee6e68..3af03a09613883 100644
--- a/tensorflow/core/grappler/optimizers/auto_parallel_test.cc
+++ b/tensorflow/core/grappler/optimizers/auto_parallel_test.cc
@@ -126,6 +126,30 @@ TEST_F(AutoParallelTest, SimpleParallel) {
   EXPECT_EQ("^AutoParallel-Control-Fetch", node_gradient.input(0));
 }
 
+TEST_F(AutoParallelTest, SimpleParallelNoDequeue) {
+  tensorflow::Scope s = tensorflow::Scope::DisabledShapeInferenceScope();
+  Output constant_a = ops::Const(s.WithOpName("constant_a"), 1.0f, {1});
+  Output constant_c = ops::Const(s.WithOpName("constant_c"), 1.0f, {1});
+  Output constant_b = ops::Const(s.WithOpName("constant_b"), 1, {1});
+  Output var = ops::Variable(s.WithOpName("var"), {1}, DT_FLOAT);
+  Output assign = ops::Assign(s.WithOpName("assign"), {var}, {constant_a});
+  Output add = ops::AddN(s.WithOpName("add"), {constant_a, constant_c});
+  Output learning_rate = ops::Const(s.WithOpName("learning_rate"), 0.01f, {1});
+  Output apply_gradient = ops::ApplyGradientDescent(
+      s.WithOpName("apply_gradient"), {var}, {learning_rate}, {add});
+
+  GrapplerItem item;
+  item.init_ops.push_back("assign");
+  item.fetch.push_back("apply_gradient");
+  item.init_ops.push_back("assign");
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  AutoParallel parallel(2);
+  GraphDef output;
+  Status status = parallel.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 332d2d4e6d9668..1be7f2692e0f76 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -68,6 +68,12 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const {
     // The output values of this node may be needed.
     return false;
   }
+
+  if (node.input_size() < 1) {
+    // Node lacks input, is invalid
+    return false;
+  }
+
   const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
   CHECK(input != nullptr) << "node = " << node.name()
                           << " input = " << node.input(0);
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 35911ee5d5540a..6299ca3e3b1d90 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -364,6 +364,16 @@ class SparseBincountOp : public OpKernel {
       for (int64 i = 0; i < indices_mat.dimension(0); ++i) {
         const int64 batch = indices_mat(i, 0);
         const Tidx bin = values(i);
+        OP_REQUIRES(
+            ctx, batch < out.dimension(0),
+            errors::InvalidArgument("Index out of bound. `batch` (", batch,
+                                    ") must be less than the dimension size (",
+                                    out.dimension(0), ")."));
+        OP_REQUIRES(
+            ctx, bin < out.dimension(1),
+            errors::InvalidArgument("Index out ouf bound. `bin` (", bin,
+                                    ") must be less then the dimension size (",
+                                    out.dimension(1), ")."));
         if (bin < size) {
           if (binary_output_) {
             out(batch, bin) = T(1);
@@ -420,6 +430,15 @@ class RaggedBincountOp : public OpKernel {
     int num_values = values.size();
     int batch_idx = 0;
 
+    OP_REQUIRES(ctx, splits(0) == 0,
+                errors::InvalidArgument("Splits must start with 0, not with ",
+                                        splits(0)));
+
+    OP_REQUIRES(ctx, splits(num_rows) == num_values,
+                errors::InvalidArgument(
+                    "Splits must end with the number of values, got ",
+                    splits(num_rows), " instead of ", num_values));
+
     Tensor* out_t;
     OP_REQUIRES_OK(
         ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t));
diff --git a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
index 0065bdd66aa708..916db1f436148b 100644
--- a/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/quantile_ops.cc
@@ -116,6 +116,9 @@ class BoostedTreesCreateQuantileStreamResourceOp : public OpKernel {
     const Tensor* num_streams_t;
     OP_REQUIRES_OK(context, context->input(kNumStreamsName, &num_streams_t));
     int64 num_streams = num_streams_t->scalar<int64>()();
+    OP_REQUIRES(context, num_streams >= 0,
+                errors::InvalidArgument(
+                    "Num_streams input cannot be a negative integer"));
 
     auto result =
         new QuantileStreamResource(epsilon, max_elements_, num_streams);
diff --git a/tensorflow/core/kernels/boosted_trees/resource_ops.cc b/tensorflow/core/kernels/boosted_trees/resource_ops.cc
index ac1fb5652da5f9..8036f2b20f36bb 100644
--- a/tensorflow/core/kernels/boosted_trees/resource_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/resource_ops.cc
@@ -53,6 +53,7 @@ class BoostedTreesCreateEnsembleOp : public OpKernel {
     if (!result->InitFromSerialized(
             tree_ensemble_serialized_t->scalar<tstring>()(), stamp_token)) {
       result->Unref();
+      result.release();  // Needed due to the `->Unref` above, to prevent UAF
       OP_REQUIRES(
           context, false,
           errors::InvalidArgument("Unable to parse tree ensemble proto."));
diff --git a/tensorflow/core/kernels/boosted_trees/stats_ops.cc b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
index 851e5b78e847b7..bb6709d32d5e2f 100644
--- a/tensorflow/core/kernels/boosted_trees/stats_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/stats_ops.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <limits>
+#include <string>
 #include <vector>
 
 #include "third_party/eigen3/Eigen/Core"
@@ -22,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/boosted_trees/boosted_trees.pb.h"
 #include "tensorflow/core/kernels/boosted_trees/tree_helper.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -51,6 +53,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
     // node_id_range
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+        errors::InvalidArgument("node_id_range must be a rank 1 tensor, but "
+                                "given node_id_range has dims of ",
+                                node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+                    "node_id_range must be a rank 1 tensor with shape=[2], but "
+                    "given node_id_range has shape ",
+                    node_id_range_t->dim_size(0), " on its first dim"));
     const auto node_id_range = node_id_range_t->vec<int32>();
     const int32 node_id_first = node_id_range(0);  // inclusive
     const int32 node_id_last = node_id_range(1);   // exclusive
@@ -60,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
                                                 &stats_summary_list));
     const int64 num_buckets = stats_summary_list[0].dim_size(1);
     // Check for single logit: 1 gradient + 1 hessian value.
-    DCHECK_EQ(stats_summary_list[0].dim_size(2), 2);
+    OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2,
+                errors::InvalidArgument("stats_summary_list[0] must have "
+                                        "exactly 2 dimensions, obtained: ",
+                                        stats_summary_list[0].dim_size(2)));
     std::vector<TTypes<float, 3>::ConstTensor> stats_summary;
     stats_summary.reserve(stats_summary_list.size());
     for (const auto& tensor : stats_summary_list) {
@@ -244,12 +259,18 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     // node_id_range
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->NumElements() == 2,
+        errors::InvalidArgument("node_id_range argument must have shape [2]"));
     const auto node_id_range = node_id_range_t->vec<int32>();
     const int32 node_id_first = node_id_range(0);  // inclusive
     const int32 node_id_last = node_id_range(1);   // exclusive
 
     const Tensor* stats_summary_t;
     OP_REQUIRES_OK(context, context->input("stats_summary", &stats_summary_t));
+    OP_REQUIRES(
+        context, stats_summary_t->shape().dims() == 4,
+        errors::InvalidArgument("stats_summary argument must have rank 4"));
     TTypes<float, 4>::ConstTensor stats_summary =
         stats_summary_t->tensor<float, 4>();
     const int32 feature_dims = stats_summary_t->dim_size(1);
@@ -257,11 +278,18 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     const int32 num_buckets = stats_summary_t->dim_size(2) - 1;
     const int32 logits_dim = logits_dim_;
     const int32 hessian_dim = stats_summary_t->dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+    OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument("hessian dim should be < 0, got ",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    "hessian dim should be <= ", logits_dim * logits_dim,
+                    " but got: ", hessian_dim));
 
     const Tensor* l1_t;
     OP_REQUIRES_OK(context, context->input("l1", &l1_t));
+    OP_REQUIRES(context, l1_t->NumElements() == 1,
+                errors::InvalidArgument("l1 argument must be a scalar"));
     const auto l1 = l1_t->scalar<float>()();
     DCHECK_GE(l1, 0);
     if (logits_dim_ > 1) {
@@ -271,17 +299,25 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
 
     const Tensor* l2_t;
     OP_REQUIRES_OK(context, context->input("l2", &l2_t));
+    OP_REQUIRES(context, l2_t->NumElements() == 1,
+                errors::InvalidArgument("l2 argument must be a scalar"));
     const auto l2 = l2_t->scalar<float>()();
     DCHECK_GE(l2, 0);
 
     const Tensor* tree_complexity_t;
     OP_REQUIRES_OK(context,
                    context->input("tree_complexity", &tree_complexity_t));
+    OP_REQUIRES(
+        context, tree_complexity_t->NumElements() == 1,
+        errors::InvalidArgument("tree_complexity argument must be a scalar"));
     const auto tree_complexity = tree_complexity_t->scalar<float>()();
 
     const Tensor* min_node_weight_t;
     OP_REQUIRES_OK(context,
                    context->input("min_node_weight", &min_node_weight_t));
+    OP_REQUIRES(
+        context, min_node_weight_t->NumElements() == 1,
+        errors::InvalidArgument("min_node_weight argument must be a scalar"));
     const auto min_node_weight = min_node_weight_t->scalar<float>()();
 
     std::vector<int32> output_node_ids;
@@ -290,7 +326,7 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     std::vector<int32> output_thresholds;
     std::vector<Eigen::VectorXf> output_left_node_contribs;
     std::vector<Eigen::VectorXf> output_right_node_contribs;
-    std::vector<string> output_split_types;
+    std::vector<std::string> output_split_types;
 
     // TODO(tanzheny) parallelize the computation.
     // Iterate each node and find the best gain per node.
@@ -567,6 +603,16 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     // node_id_range
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+        errors::InvalidArgument("node_id_range must be a rank 1 tensor, but "
+                                "given node_id_range has dims of ",
+                                node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+                    "node_id_range must be a rank 1 tensor with shape=[2], but "
+                    "given node_id_range has shape ",
+                    node_id_range_t->dim_size(0), " on its first dim"));
     const auto node_id_range = node_id_range_t->vec<int32>();
     const int32 node_id_first = node_id_range(0);  // Inclusive.
     const int32 node_id_last = node_id_range(1);   // Exclusive.
@@ -583,8 +629,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     const int32 num_buckets = stats_summaries_list[0].dim_size(2) - 1;
     const int32 logits_dim = logits_dim_;
     const int32 hessian_dim = stats_summaries_list[0].dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+   OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument("hessian dim should be < 0, got ",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    "hessian dim should be <= ", logits_dim * logits_dim,
+                    " but got: ", hessian_dim));
 
     // Vector of stats_summaries; each element is stats for feature of shape
     // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
@@ -959,6 +1010,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input("node_id_range", &node_id_range_t));
     const auto node_id_range = node_id_range_t->vec<int32>();
+    OP_REQUIRES(
+        context, node_id_range.size() == 2,
+        errors::InvalidArgument("node_id_range should have 2 entries, got: ",
+                                node_id_range.size()));
     const int32 node_id_first = node_id_range(0);  // inclusive
     const int32 node_id_last = node_id_range(1);   // exclusive
 
@@ -1025,6 +1080,18 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
       const int32 feature_dim = stats_summary_indices(idx, 1);
       const int32 bucket_id = stats_summary_indices(idx, 2);
       const int32 stat_dim = stats_summary_indices(idx, 3);
+      OP_REQUIRES(context, stat_dim < stats_dims,
+                  errors::InvalidArgument(
+                      "Stat dim, the sum of logits dim and hessian dim in "
+                      "stats_summary_indices, cannot be greater than stats "
+                      "dims, the last value in stats_summary_shape, which was ",
+                      stats_dims, ". At index (", idx,
+                      ", 4), stats_summary_indices contains value ", stat_dim));
+      OP_REQUIRES(context, stat_dim >= 0,
+                  errors::InvalidArgument(
+                      "Stat dim, the sum of logits dim and hessian dim in "
+                      "stats_summary_indices, should be >= 0, which was ",
+                      stat_dim, " at index ", idx));
       std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
           FeatureMapIterator::value_type(feature_dim, BucketMap()));
       auto& b_map = f_insert_result.first->second;
@@ -1257,6 +1324,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel {
     const Tensor* gradients_t;
     OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            "node_ids size should match 0th dim of gradients. node ids "
+            "size: ",
+            node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
     // hessians
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
@@ -1326,6 +1399,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
     OP_REQUIRES_OK(context, context->input("gradients", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
 
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            "node_ids size should match 0th dim of gradients. node ids "
+            "size: ",
+            node_ids.size(), ", gradients dim0: ", gradients.dimension(0)));
+
     // hessians.
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input("hessians", &hessians_t));
@@ -1356,6 +1436,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
 
     for (int i = 0; i < batch_size; ++i) {
       const int32 node = node_ids(i);
+      OP_REQUIRES(context, node >= 0,
+                  errors::InvalidArgument(
+                      "node_ids ", i, "th entry should be >=0, got: ", node));
       for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) {
         const int32 feature_value = feature(i, feature_dim);
         const int32 bucket =
@@ -1563,7 +1646,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
     const int64 stats_dims = logits_dims + hessians_dims;
     const int64 num_sparse_entries = feature_indices_t->dim_size(0);
     const int32 feature_dims = feature_shape(1);
-    DCHECK_LE(num_sparse_entries, batch_size * feature_dims);
+    OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
+                errors::InvalidArgument(
+                    "feature_indices dim0 should be <= gradients dim0 * "
+                    "feature_shape[1]. features_indices dim0: ",
+                    num_sparse_entries, " gradients dim0: ", batch_size,
+                    ", feature_shape[1]: ", feature_dims));
 
     // Aggregate statistics info to map.
     StatsPartitionMap stats_map;
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index a49f6b5f1949fc..96c9e767f6652c 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -495,6 +495,14 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
     const int filter_total_size = dims.spatial_dims[0].filter_size *
                                   dims.spatial_dims[1].filter_size *
                                   dims.in_depth;
+    OP_REQUIRES(
+        context,
+        filter_total_size * dims.out_depth == filter_backprop->NumElements(),
+        errors::InvalidArgument(
+            "filter_size does not have enough elements, requested ",
+            filter_total_size * dims.out_depth, ", got ",
+            filter_backprop->NumElements()));
+
     // The output image size is the spatial size of the output.
     const int output_image_size =
         dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size;
@@ -518,6 +526,11 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
 
     const size_t work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size != 0,
+        errors::InvalidArgument(
+            "Work size for convolution would be 0, which is not acceptable"));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 2605f7b02f1ed4..6a6b8bb5883942 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -677,6 +677,11 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
         dims.batch_size == 1 ||
         thread_work_unit_size >= min_thread_work_unit_size;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+    
     const size_t shard_size =
         use_parallel_contraction
             ? 1
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index ee40cd537d7ef8..c7101724c52bec 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -239,6 +239,28 @@ class Conv3DBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 "Conv3DBackpropInputOp", /*num_spatial_dims=*/3,
@@ -346,6 +368,28 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 "Conv3DBackpropInputOp", /*num_spatial_dims=*/3,
@@ -416,6 +460,11 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
     // contraction compared to sharding and matmuls.
     const bool use_parallel_contraction = dims.batch_size == 1;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+
     const size_t shard_size =
         use_parallel_contraction
             ? 1
@@ -696,6 +745,28 @@ class Conv3DBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
@@ -808,6 +879,28 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
@@ -880,6 +973,11 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
 
     const int64 work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.cc b/tensorflow/core/kernels/conv_grad_shape_utils.cc
index bba989b4f9230c..7ae3f8b7631add 100644
--- a/tensorflow/core/kernels/conv_grad_shape_utils.cc
+++ b/tensorflow/core/kernels/conv_grad_shape_utils.cc
@@ -127,6 +127,10 @@ Status ConvBackpropComputeDimensionsV2(
   // dimensions of the filter Tensor.
   VLOG(2) << "input vs filter_in depth " << dims->in_depth << " "
           << filter_shape.dim_size(num_dims - 2);
+  if (filter_shape.dim_size(num_dims - 2) <= 0) {
+    return errors ::InvalidArgument(
+        label, ": filter depth must be strictly greated than zero");
+  }
   if (dims->in_depth % filter_shape.dim_size(num_dims - 2)) {
     return errors::InvalidArgument(
         label, ": input depth must be evenly divisible by filter depth");
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 025a8e37a94e91..1e00e2c27cd7af 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -201,6 +201,10 @@ struct LaunchConv2DOp<GPUDevice, int32> {
                     "attempted to be run because the input depth of ",
                     in_depth, " does not match the filter input depth of ",
                     filter.dim_size(2)));
+    OP_REQUIRES(
+        ctx, filter.NumElements() > 0,
+        errors::InvalidArgument("filter must not have zero elements "
+                                "(i.e. all dimensions must be non-zero)"));
 
     for (int64 explicit_padding : explicit_paddings) {
       if (!FastBoundsCheck(explicit_padding, std::numeric_limits<int>::max())) {
@@ -425,6 +429,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params,
               errors::InvalidArgument("Patch depth too large"));
   const int in_depth = static_cast<int>(in_depth_raw);
   const int patch_depth = static_cast<int>(patch_depth_raw);
+  TF_REQUIRES(patch_depth > 0,
+              errors::InvalidArgument(
+                  "filter depth must be stricly positive, got ", patch_depth));
   TF_REQUIRES(in_depth % patch_depth == 0,
               errors::InvalidArgument(
                   "input depth must be evenly divisible by filter depth: ",
@@ -671,6 +678,11 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
   const int64 patch_cols = filter.dim_size(1);
   const int64 patch_depths = filter.dim_size(2);
 
+  OP_REQUIRES(
+      ctx, filter.NumElements() > 0,
+      errors::InvalidArgument("filter must not have zero elements "
+                              "(i.e. all dimensions must be non-zero)"));
+
   // If the filter in-depth (patch_depths) is 1 and smaller than the input
   // depth, it's a depthwise convolution. More generally, if the filter in-depth
   // divides but is smaller than the input depth, it is a grouped convolution.
diff --git a/tensorflow/core/kernels/conv_ops_3d.h b/tensorflow/core/kernels/conv_ops_3d.h
index 9dcdea5b18f10b..8073ca5a9dfdce 100644
--- a/tensorflow/core/kernels/conv_ops_3d.h
+++ b/tensorflow/core/kernels/conv_ops_3d.h
@@ -56,6 +56,11 @@ struct LaunchConvOp<CPUDevice, T, OpKernelContextT> {
                 errors::InvalidArgument("CPU implementation of Conv3D "
                                         "currently only supports dilated rates "
                                         "of 1."));
+    OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1),
+                errors::InvalidArgument(
+                    "Number of channels in filter (", filter.dim_size(3),
+                    ") must match last dimension of input (",
+                    input.dim_size(input.dims() - 1), ")"));
     functor::CuboidConvolution<CPUDevice, T>()(
         context->template eigen_device<CPUDevice>(), output->tensor<T, 5>(),
         input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
@@ -135,6 +140,8 @@ class Conv3DOp : public BinaryOpBase<T, OpKernelT, OpKernelConstructionT> {
     const int64 filter_depth = filter.dim_size(3);
     const int64 out_depth = filter.dim_size(4);
 
+    OP_REQUIRES(context, filter_depth != 0,
+                errors::InvalidArgument("filter_depth must be non-zero"));
     OP_REQUIRES(context, in_depth % filter_depth == 0,
                 errors::InvalidArgument(
                     "Input depth must be evenly divisible by filter depth: ",
diff --git a/tensorflow/core/kernels/count_ops.cc b/tensorflow/core/kernels/count_ops.cc
index 087deef0812f00..40aa1fe458c1ee 100644
--- a/tensorflow/core/kernels/count_ops.cc
+++ b/tensorflow/core/kernels/count_ops.cc
@@ -122,6 +122,9 @@ class DenseCount : public OpKernel {
 
     int num_batch_elements = 1;
     for (int i = 0; i < num_batch_dimensions; ++i) {
+      OP_REQUIRES(context, data.shape().dim_size(i) != 0,
+                  errors::InvalidArgument(
+                      "Invalid input: Shapes dimension cannot be 0."));
       num_batch_elements *= data.shape().dim_size(i);
     }
     int num_value_elements = data.shape().num_elements() / num_batch_elements;
@@ -192,10 +195,22 @@ class SparseCount : public OpKernel {
               "; values shape: ", values.shape().DebugString()));
     }
 
+    OP_REQUIRES(context, shape.NumElements() != 0,
+                errors::InvalidArgument(
+                    "The shape argument requires at least one element."));
+
     bool is_1d = shape.NumElements() == 1;
-    int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
+    auto shape_vector = shape.flat<int64>();
+    int num_batches = is_1d ? 1 : shape_vector(0);
     int num_values = values.NumElements();
 
+    for (int b = 0; b < shape_vector.size(); b++) {
+      OP_REQUIRES(context, shape_vector(b) >= 0,
+                  errors::InvalidArgument(
+                      "Elements in dense_shape must be >= 0. Instead got:",
+                      shape.DebugString()));
+    }
+
     OP_REQUIRES(context, num_values == indices.shape().dim_size(0),
                 errors::InvalidArgument(
                     "Number of values must match first dimension of indices.",
@@ -212,6 +227,14 @@ class SparseCount : public OpKernel {
 
     for (int idx = 0; idx < num_values; ++idx) {
       int batch = is_1d ? 0 : indices_values(idx, 0);
+      if (batch >= num_batches) {
+        OP_REQUIRES(context, batch < num_batches,
+                    errors::InvalidArgument(
+                        "Indices value along the first dimension must be ",
+                        "lower than the first index of the shape.", "Got ",
+                        batch, " as batch and ", num_batches,
+                        " as the first dimension of the shape."));
+      }
       const auto& value = values_values(idx);
       if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) {
         if (binary_output_) {
diff --git a/tensorflow/core/kernels/ctc_decoder_ops.cc b/tensorflow/core/kernels/ctc_decoder_ops.cc
index d62aef2d03b988..9efdac60e369c2 100644
--- a/tensorflow/core/kernels/ctc_decoder_ops.cc
+++ b/tensorflow/core/kernels/ctc_decoder_ops.cc
@@ -70,6 +70,9 @@ class CTCDecodeHelper {
     if (inputs_shape.dims() != 3) {
       return errors::InvalidArgument("inputs is not a 3-Tensor");
     }
+    if (inputs_shape.num_elements() == 0) {
+      return errors::InvalidArgument("inputs must not be empty");
+    }
 
     const int64 max_time = inputs_shape.dim_size(0);
     const int64 batch_size = inputs_shape.dim_size(1);
@@ -232,6 +235,8 @@ class CTCGreedyDecoderOp : public OpKernel {
         int prev_indices = -1;
         for (int t = 0; t < seq_len_t(b); ++t) {
           int max_class_indices;
+          OP_REQUIRES(ctx, input_list_t[t].dimension(1) > 0,
+                      errors::InvalidArgument("Invalid input dimensions."));
           log_prob_t(b, 0) +=
               -RowMax<T>(input_list_t[t], b, &max_class_indices);
           if (max_class_indices != blank_index &&
diff --git a/tensorflow/core/kernels/ctc_loss_op.cc b/tensorflow/core/kernels/ctc_loss_op.cc
index 6358e82fdda853..ca505e1db93145 100644
--- a/tensorflow/core/kernels/ctc_loss_op.cc
+++ b/tensorflow/core/kernels/ctc_loss_op.cc
@@ -100,11 +100,18 @@ class CTCLossOp : public OpKernel {
                 errors::InvalidArgument("sequence_length is not a vector"));
     OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(labels_indices->shape()),
                 errors::InvalidArgument("labels_indices is not a matrix"));
+    OP_REQUIRES(ctx, labels_indices->dim_size(1) > 1,
+                errors::InvalidArgument(
+                    "labels_indices second dimension must be >= 1. Received ",
+                    labels_indices->dim_size(1)));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(labels_values->shape()),
                 errors::InvalidArgument("labels_values is not a vector"));
 
     const TensorShape& inputs_shape = inputs->shape();
     const int64 max_time = inputs_shape.dim_size(0);
+    OP_REQUIRES(ctx, max_time != 0,
+                errors::InvalidArgument(
+                    "Max time or first dimension of input cannot be 0."));
     const int64 batch_size = inputs_shape.dim_size(1);
     const int64 num_classes_raw = inputs_shape.dim_size(2);
     OP_REQUIRES(
diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h
index 9adc628421d046..4f2c83322ba00f 100644
--- a/tensorflow/core/kernels/cwise_ops_common.h
+++ b/tensorflow/core/kernels/cwise_ops_common.h
@@ -265,6 +265,11 @@ class SimpleBinaryOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& in0 = ctx->input(0);
     const Tensor& in1 = ctx->input(1);
+    OP_REQUIRES(
+        ctx, in0.NumElements() == in1.NumElements(),
+        errors::InvalidArgument("The two arguments to a cwise op must have "
+                                "same number of elements, got ",
+                                in0.NumElements(), " and ", in1.NumElements()));
     auto in0_flat = in0.flat<Tin>();
     auto in1_flat = in1.flat<Tin>();
     const Device& eigen_device = ctx->eigen_device<Device>();
diff --git a/tensorflow/core/kernels/data/experimental/compression_ops.cc b/tensorflow/core/kernels/data/experimental/compression_ops.cc
index efa7018acb6293..8cc214671bd742 100644
--- a/tensorflow/core/kernels/data/experimental/compression_ops.cc
+++ b/tensorflow/core/kernels/data/experimental/compression_ops.cc
@@ -48,6 +48,11 @@ void UncompressElementOp::Compute(OpKernelContext* ctx) {
   Tensor tensor = ctx->input(0);
   const Variant& variant = tensor.scalar<Variant>()();
   const CompressedElement* compressed = variant.get<CompressedElement>();
+  OP_REQUIRES(
+      ctx, compressed != nullptr,
+      errors::InvalidArgument(
+          "Input does not contain a compressed element. Instead got tensor ",
+          tensor.DebugString()));
 
   std::vector<Tensor> components;
   OP_REQUIRES_OK(ctx, UncompressElement(*compressed, &components));
diff --git a/tensorflow/core/kernels/data/experimental/io_ops.cc b/tensorflow/core/kernels/data/experimental/io_ops.cc
index 903088ff481390..6783e5f6cd0f3a 100644
--- a/tensorflow/core/kernels/data/experimental/io_ops.cc
+++ b/tensorflow/core/kernels/data/experimental/io_ops.cc
@@ -253,7 +253,11 @@ class LoadDatasetOp::Dataset : public DatasetBase {
     explicit Iterator(const Params& params)
         : DatasetIterator<Dataset>(params) {}
 
-    ~Iterator() override { input_->Unref(); }
+    ~Iterator() override {
+      if (input_) {
+        input_->Unref();
+      }
+    }
 
     Status Initialize(IteratorContext* ctx) override {
       mutex_lock l(mu_);
@@ -330,7 +334,7 @@ class LoadDatasetOp::Dataset : public DatasetBase {
     }
 
     mutex mu_;
-    DatasetBase* input_ TF_GUARDED_BY(mu_);
+    DatasetBase* input_ TF_GUARDED_BY(mu_) = nullptr;
     std::unique_ptr<IteratorBase> input_impl_ TF_GUARDED_BY(mu_);
     std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func_;
   };
diff --git a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
index 01044957857106..2cb5c69584faf3 100644
--- a/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/snapshot_dataset_op.cc
@@ -191,8 +191,6 @@ class SnapshotDatasetV2Op::Dataset::Iterator::Reader
 
   explicit Reader(const Params& params, int64 start_index);
 
-  ~Reader() override;
-
   Status Initialize(IteratorContext* ctx) override;
 
   Status GetNextInternal(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
@@ -212,7 +210,7 @@ class SnapshotDatasetV2Op::Dataset::Iterator::Reader
 
   std::unique_ptr<IteratorBase> input_impl_ TF_GUARDED_BY(mu_);
 
-  DatasetBase* input_ TF_GUARDED_BY(mu_);
+  DatasetBase* input_ TF_GUARDED_BY(mu_) = nullptr;
 
   std::unique_ptr<InstantiatedCapturedFunction> instantiated_reader_func_
       TF_GUARDED_BY(mu_);
@@ -451,7 +449,11 @@ Status SnapshotDatasetV2Op::Dataset::Iterator::GetNextInternal(
     bool* end_of_sequence) {
   mutex_lock l(mu_);
   if (iterator_ == nullptr) {
-    TF_RETURN_IF_ERROR(InitializeIterator(ctx, nullptr));
+    Status s = InitializeIterator(ctx, /*reader=*/nullptr);
+    if (!s.ok()) {
+      iterator_.reset();
+      return s;
+    }
   }
   index_++;
   return iterator_->GetNext(ctx, out_tensors, end_of_sequence);
@@ -530,8 +532,6 @@ SnapshotDatasetV2Op::Dataset::Iterator::Reader::Reader(const Params& params,
                                                        int64 start_index)
     : DatasetIterator<Dataset>(params), start_index_(start_index) {}
 
-SnapshotDatasetV2Op::Dataset::Iterator::Reader::~Reader() { input_->Unref(); }
-
 Status SnapshotDatasetV2Op::Dataset::Iterator::Reader::Initialize(
     IteratorContext* ctx) {
   mutex_lock l(mu_);
@@ -578,11 +578,6 @@ Status SnapshotDatasetV2Op::Dataset::Iterator::Reader::Initialize(
         "reader_func returns more than one argument.");
   }
   TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(reader_output[0], &input_));
-
-  // We need to take a reference here as we will use the input_ and
-  // its iterator.
-  input_->Ref();
-
   return input_->MakeIterator(ctx, this, prefix(), &input_impl_);
 }
 
diff --git a/tensorflow/core/kernels/data/experimental/to_tf_record_op.cc b/tensorflow/core/kernels/data/experimental/to_tf_record_op.cc
index bfa894cd473b40..56401bb91f5753 100644
--- a/tensorflow/core/kernels/data/experimental/to_tf_record_op.cc
+++ b/tensorflow/core/kernels/data/experimental/to_tf_record_op.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/framework/function_handle_cache.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/resource_mgr.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/data/dataset_utils.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/threadpool.h"
@@ -87,8 +88,20 @@ class ToTFRecordOp : public AsyncOpKernel {
     TF_RETURN_IF_ERROR(dataset->MakeIterator(
         &iter_ctx, /*parent=*/nullptr, "ToTFRecordOpIterator", &iterator));
 
+    const int num_output_dtypes = dataset->output_dtypes().size();
+    if (num_output_dtypes != 1) {
+      return errors::InvalidArgument(
+          "ToTFRecordOp currently only support datasets of 1 single column, ",
+          "but got ", num_output_dtypes);
+    }
+    const DataType dt = dataset->output_dtypes()[0];
+    if (dt != DT_STRING) {
+      return errors::InvalidArgument(
+          "ToTFRecordOp currently only supports DT_STRING dataypes, but got ",
+          DataTypeString(dt));
+    }
     std::vector<Tensor> components;
-    components.reserve(dataset->output_dtypes().size());
+    components.reserve(num_output_dtypes);
     bool end_of_sequence;
     do {
       TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
index 9efc9fddf58981..7cd31cc6188ea3 100644
--- a/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
+++ b/tensorflow/core/kernels/data/sparse_tensor_slice_dataset_op.cc
@@ -241,6 +241,17 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel {
                 errors::InvalidArgument(
                     "Input indices should be a matrix but received shape ",
                     indices->shape().DebugString()));
+
+    const auto num_indices = indices->NumElements();
+    const auto num_values = values->NumElements();
+    if (num_indices == 0 || num_values == 0) {
+      OP_REQUIRES(ctx, num_indices == num_values,
+                  errors::InvalidArgument(
+                      "If indices or values are empty, the other one must also "
+                      "be. Got indices of shape ",
+                      indices->shape().DebugString(), " and values of shape ",
+                      values->shape().DebugString()));
+    }
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(values->shape()),
                 errors::InvalidArgument(
                     "Input values should be a vector but received shape ",
diff --git a/tensorflow/core/kernels/decode_padded_raw_op.cc b/tensorflow/core/kernels/decode_padded_raw_op.cc
index 12e8ec6aff0d41..ca7c7104b442d2 100644
--- a/tensorflow/core/kernels/decode_padded_raw_op.cc
+++ b/tensorflow/core/kernels/decode_padded_raw_op.cc
@@ -83,14 +83,13 @@ class DecodePaddedRawOp : public OpKernel {
     // can copy the memory directly.
     if (!convert_data_endianness_ || sizeof(T) == 1) {
       for (int64 i = 0; i < flat_in.size(); ++i) {
-        const T* in_data = reinterpret_cast<const T*>(flat_in(i).data());
-
-        if (flat_in(i).size() > fixed_length) {
-          memcpy(out_data, in_data, fixed_length);
-        } else {
-          memcpy(out_data, in_data, flat_in(i).size());
-        }
-        out_data += fixed_length;
+        const auto to_copy =
+            std::min(flat_in(i).size(), static_cast<size_t>(fixed_length));
+        memcpy(out_data, flat_in(i).data(), to_copy);
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     } else {
       // Otherwise, the data is not in the host's byte order, and rather than a
@@ -105,7 +104,10 @@ class DecodePaddedRawOp : public OpKernel {
              p_in += sizeof(T), p_out += sizeof(T)) {
           std::reverse_copy(p_in, p_in + sizeof(T), p_out);
         }
-        out_data += fixed_length;
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     }
   }
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 5393a677db242a..7a90e0c340b093 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -98,6 +98,18 @@ class DequantizeOp : public OpKernel {
     if (axis_ > -1) {
       num_slices = input.dim_size(axis_);
     }
+    OP_REQUIRES(ctx, input_min_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    "input_min_tensor must have as many elements as input on "
+                    "the dequantization axis (",
+                    axis_, "), got ", input_min_tensor.NumElements(),
+                    ", expected ", num_slices));
+    OP_REQUIRES(ctx, input_max_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    "input_max_tensor must have as many elements as input on "
+                    "the dequantization axis (",
+                    axis_, "), got ", input_max_tensor.NumElements(),
+                    ", expected ", num_slices));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/dilation_ops.cc b/tensorflow/core/kernels/dilation_ops.cc
index 738ea31d555d5f..996ddb62bfefeb 100644
--- a/tensorflow/core/kernels/dilation_ops.cc
+++ b/tensorflow/core/kernels/dilation_ops.cc
@@ -130,6 +130,7 @@ class DilationOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Output tensor is of the following dimensions:
     // [ batch, out_rows, out_cols, depth ]
@@ -229,6 +230,7 @@ class DilationBackpropInputOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -318,8 +320,10 @@ struct DilationBackpropInput<CPUDevice, T> {
                 }
               }
             }
-            in_backprop(b, h_in_max, w_in_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_in_max < input_rows && w_in_max < input_cols) {
+              in_backprop(b, h_in_max, w_in_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
@@ -349,6 +353,7 @@ class DilationBackpropFilterOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -438,8 +443,10 @@ struct DilationBackpropFilter<CPUDevice, T> {
                 }
               }
             }
-            filter_backprop(h_max, w_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_max < filter_rows && w_max < filter_cols) {
+              filter_backprop(h_max, w_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
diff --git a/tensorflow/core/kernels/edit_distance_op.cc b/tensorflow/core/kernels/edit_distance_op.cc
index 4aecdc9e414d36..386a1af08409f6 100644
--- a/tensorflow/core/kernels/edit_distance_op.cc
+++ b/tensorflow/core/kernels/edit_distance_op.cc
@@ -64,6 +64,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
     return errors::InvalidArgument(
         "truth_shape should be a vector, but got shape: ",
         truth_shape.shape().DebugString());
+  if (hypothesis_values.NumElements() != hypothesis_indices.dim_size(0))
+    return errors::InvalidArgument(
+        "Expected hypothesis_values.NumElements == "
+        "#rows(hypothesis_indices), their shapes are: ",
+        hypothesis_values.shape().DebugString(), " and ",
+        hypothesis_indices.shape().DebugString());
   if (hypothesis_shape.NumElements() != hypothesis_indices.dim_size(1))
     return errors::InvalidArgument(
         "Expected hypothesis_shape.NumElements == "
@@ -75,6 +81,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
         "Input SparseTensors must have rank at least 2, but truth_shape "
         "rank is: ",
         truth_shape.NumElements());
+  if (truth_values.NumElements() != truth_indices.dim_size(0))
+    return errors::InvalidArgument(
+        "Expected truth_values.NumElements == "
+        "#rows(truth_indices), their shapes are: ",
+        truth_values.shape().DebugString(), " and ",
+        truth_indices.shape().DebugString());
   if (truth_shape.NumElements() != truth_indices.dim_size(1))
     return errors::InvalidArgument(
         "Expected truth_shape.NumElements == "
@@ -153,6 +165,11 @@ class EditDistanceOp : public OpKernel {
       output_shape.AddDim(std::max(hypothesis_st_shape.dim_size(d),
                                    truth_st_shape.dim_size(d)));
     }
+    const auto output_elements = output_shape.num_elements();
+    OP_REQUIRES(
+        ctx, output_elements > 0,
+        errors::InvalidArgument("Got output shape ", output_shape.DebugString(),
+                                " which has 0 elements"));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output("output", output_shape, &output));
@@ -185,6 +202,12 @@ class EditDistanceOp : public OpKernel {
       if (g_truth == g_hypothesis) {
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) =
             gtl::LevenshteinDistance<T>(truth_seq, hypothesis_seq, cmp);
         if (normalize_) output_t(loc) /= truth_seq.size();
@@ -194,6 +217,12 @@ class EditDistanceOp : public OpKernel {
       } else if (g_truth > g_hypothesis) {  // zero-length truth
         auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) = hypothesis_seq.size();
         if (normalize_ && output_t(loc) != 0.0f) {
           output_t(loc) = std::numeric_limits<float>::infinity();
@@ -202,6 +231,12 @@ class EditDistanceOp : public OpKernel {
       } else {  // zero-length hypothesis
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
         ++truth_iter;
       }
@@ -212,6 +247,12 @@ class EditDistanceOp : public OpKernel {
       auto hypothesis_seq = hypothesis_j.values<T>();
       auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal("Got an inner product ", loc,
+                           " which would require in writing to outside of the "
+                           "buffer for the output tensor (max elements ",
+                           output_elements, ")"));
       output_t(loc) = hypothesis_seq.size();
       if (normalize_ && output_t(loc) != 0.0f) {
         output_t(loc) = std::numeric_limits<float>::infinity();
@@ -224,6 +265,12 @@ class EditDistanceOp : public OpKernel {
       auto truth_seq = truth_i.values<T>();
       auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal("Got an inner product ", loc,
+                           " which would require in writing to outside of the "
+                           "buffer for the output tensor (max elements ",
+                           output_elements, ")"));
       output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
       ++truth_iter;
     }
diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc
index 050b83980c67c9..fb6531f7bcdba9 100644
--- a/tensorflow/core/kernels/fft_ops.cc
+++ b/tensorflow/core/kernels/fft_ops.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/platform/errors.h"
 #define EIGEN_USE_THREADS
 
 // See docs in ../ops/fft_ops.cc.
@@ -221,6 +222,9 @@ class FFTCPU : public FFTBase {
       input_slice_sizes[i] = fft_shape[i - 1];
       temp_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, temp_shape.num_elements() > 0,
+                errors::InvalidArgument("Obtained a FFT shape of 0 elements: ",
+                                        temp_shape.DebugString()));
 
     auto output = out->flat_inner_dims<ComplexT, FFTRank + 1>();
     const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
@@ -261,6 +265,9 @@ class FFTCPU : public FFTBase {
           i == FFTRank ? fft_shape[i - 1] / 2 + 1 : fft_shape[i - 1];
       full_fft_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, full_fft_shape.num_elements() > 0,
+                errors::InvalidArgument("Obtained a FFT shape of 0 elements: ",
+                                        full_fft_shape.DebugString()));
 
     Tensor temp;
     OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<ComplexT>::v(),
diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
index dfc2382624e3fa..7c396126427473 100644
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@@ -80,6 +80,10 @@ class FractionalAvgPoolOp : public OpKernel {
     std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
+      OP_REQUIRES(
+          context, pooling_ratio_[i] <= input_size[i],
+          errors::InvalidArgument(
+              "Pooling ratio cannot be bigger than input tensor dim size."));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -246,6 +250,19 @@ class FractionalAvgPoolGradOp : public OpKernel {
     const int64 out_cols = out_backprop.dim_size(2);
     const int64 out_depth = out_backprop.dim_size(3);
 
+    OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows,
+                errors::InvalidArgument("Given out_backprop shape ",
+                                        out_backprop.shape().DebugString(),
+                                        ", row_seq_tensor must have at least ",
+                                        out_rows + 1, " elements, but got ",
+                                        row_seq_tensor.NumElements()));
+    OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols,
+                errors::InvalidArgument("Given out_backprop shape ",
+                                        out_backprop.shape().DebugString(),
+                                        ", col_seq_tensor must have at least ",
+                                        out_cols + 1, " elements, but got ",
+                                        col_seq_tensor.NumElements()));
+
     auto row_seq_tensor_flat = row_seq_tensor.flat<int64>();
     auto col_seq_tensor_flat = col_seq_tensor.flat<int64>();
     auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>();
@@ -254,6 +271,18 @@ class FractionalAvgPoolGradOp : public OpKernel {
     const int64 in_rows = orig_input_tensor_shape_flat(1);
     const int64 in_cols = orig_input_tensor_shape_flat(2);
     const int64 in_depth = orig_input_tensor_shape_flat(3);
+    OP_REQUIRES(
+        context, in_batch != 0,
+        errors::InvalidArgument("Batch dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_rows != 0,
+        errors::InvalidArgument("Rows dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_cols != 0,
+        errors::InvalidArgument("Columns dimension of input must not be 0"));
+    OP_REQUIRES(
+        context, in_depth != 0,
+        errors::InvalidArgument("Depth dimension of input must not be 0"));
 
     constexpr int tensor_in_and_out_dims = 4;
     // Transform orig_input_tensor_shape into TensorShape
diff --git a/tensorflow/core/kernels/fractional_max_pool_op.cc b/tensorflow/core/kernels/fractional_max_pool_op.cc
index 619a3507ce415f..1a2a783d135c54 100644
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@@ -235,6 +235,20 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     // Just to make it similar to FractionalMaxPoolOp.
     constexpr int tensor_in_and_out_dims = 4;
+    OP_REQUIRES(
+        context, tensor_in.dims() == tensor_in_and_out_dims,
+        errors::InvalidArgument("orig_input should be a tensor of rank 4, got ",
+                                tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument("orig_input must not be empty, got ",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.dims() == tensor_in_and_out_dims,
+                errors::InvalidArgument(
+                    "orig_output should be a tensor of rank 4, got ",
+                    tensor_out.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument("orig_output must not be empty, got ",
+                                        tensor_out.DebugString()));
     std::vector<int64> input_size(tensor_in_and_out_dims);
     std::vector<int64> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index d8e58093b07b25..65846bdeb77213 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -293,6 +293,9 @@ struct FusedBatchNorm<CPUDevice, T, U, /* is_training= */ false> {
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
     const int depth = x.dimension(3);
+    OP_REQUIRES(
+        context, depth != 0,
+        errors::Internal("The 4th element in the input shape cannot be 0."));
     const int size = x.size();
     const int rest_size = size / depth;
     Eigen::DSizes<Eigen::Index, 2> rest_by_depth(rest_size, depth);
@@ -1279,6 +1282,34 @@ class FusedBatchNormOpBase : public OpKernel {
                   errors::InvalidArgument("Error during tensor copy."));
     }
 
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+        context, scale.NumElements() == num_channels,
+        errors::InvalidArgument("scale must have the same number of elements "
+                                "as the channels of x, got ",
+                                scale.NumElements(), " and ", num_channels));
+    OP_REQUIRES(
+        context, offset.NumElements() == num_channels,
+        errors::InvalidArgument("offset must have the same number of elements "
+                                "as the channels of x, got ",
+                                offset.NumElements(), " and ", num_channels));
+    if (!is_training_ || exponential_avg_factor_ != 1.) {
+      std::string prefix_msg = is_training_ ? "When exponential_avg_factor != 1"
+                                            : "When is_training=false";
+      OP_REQUIRES(context, estimated_mean.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      prefix_msg,
+                      ", mean must have the same number "
+                      "of elements as the channels of x, got ",
+                      estimated_mean.NumElements(), " and ", num_channels));
+      OP_REQUIRES(context, estimated_variance.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      prefix_msg,
+                      ", variance must have the same "
+                      "number of elements as the channels of x, got ",
+                      estimated_variance.NumElements(), " and ", num_channels));
+    }
+
     if (has_side_input_) {
       OP_REQUIRES(context, side_input->shape() == x.shape(),
                   errors::InvalidArgument(
@@ -1291,7 +1322,7 @@ class FusedBatchNormOpBase : public OpKernel {
       // NOTE(ezhulenev): This requirement is coming from implementation
       // details of cudnnBatchNormalizationForwardTrainingEx.
       OP_REQUIRES(
-          context, !is_training_ || x.dim_size(3) % 4 == 0,
+          context, !is_training_ || num_channels % 4 == 0,
           errors::InvalidArgument("FusedBatchNorm with activation requires "
                                   "channel dimension to be a multiple of 4."));
     }
@@ -1425,6 +1456,11 @@ class FusedBatchNormGradOpBase : public OpKernel {
                 errors::InvalidArgument(
                     "saved variance must be 1-dimensional",
                     saved_maybe_inv_var_or_pop_var.shape().DebugString()));
+    OP_REQUIRES(
+        context, x.shape() == y_backprop.shape(),
+        errors::InvalidArgument(
+            "x and y_backprop must have same shape, but x has shape ",
+            x.shape(), " and y_backprop has shape ", y_backprop.shape()));
     bool use_reshape = (x.dims() == 5);
     auto x_shape = x.shape();
     TensorShape dest_shape;
@@ -1442,6 +1478,23 @@ class FusedBatchNormGradOpBase : public OpKernel {
                   errors::InvalidArgument("Error during tensor copy."));
     }
 
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+        context, scale.NumElements() == num_channels,
+        errors::InvalidArgument("scale must have the same number of elements "
+                                "as the channels of x, got ",
+                                scale.NumElements(), " and ", num_channels));
+    OP_REQUIRES(
+        context, saved_mean_or_pop_mean.NumElements() == num_channels,
+        errors::InvalidArgument("reserve_space_1 must have the same number of "
+                                "elements as the channels of x, got ",
+                                scale.NumElements(), " and ", num_channels));
+    OP_REQUIRES(
+        context, saved_maybe_inv_var_or_pop_var.NumElements() == num_channels,
+        errors::InvalidArgument("reserve_space_2 must have the same number of "
+                                "elements as the channels of x, got ",
+                                scale.NumElements(), " and ", num_channels));
+
     Tensor* x_backprop = nullptr;
     auto alloc_shape = use_reshape ? dest_shape : x_shape;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/image/attention_ops.cc b/tensorflow/core/kernels/image/attention_ops.cc
index 6e5e07a9fb1b3c..100be63d98e44d 100644
--- a/tensorflow/core/kernels/image/attention_ops.cc
+++ b/tensorflow/core/kernels/image/attention_ops.cc
@@ -85,11 +85,12 @@ class ExtractGlimpseOp : public OpKernel {
                     "input must be a vector of size 2 (height, width)",
                     window_size.shape().DebugString()));
 
-    const int64 output_height = window_size.tensor<int, 1>()(0);
-    const int64 output_width = window_size.tensor<int, 1>()(1);
+    const int64_t output_height = window_size.tensor<int, 1>()(0);
+    const int64_t output_width = window_size.tensor<int, 1>()(1);
+
     TensorShape output_shape = input_shape;
-    output_shape.set_dim(1, output_height);
-    output_shape.set_dim(2, output_width);
+    OP_REQUIRES_OK(context, output_shape.SetDimWithStatus(1, output_height));
+    OP_REQUIRES_OK(context, output_shape.SetDimWithStatus(2, output_width));
 
     const Tensor& offsets = context->input(2);
     OP_REQUIRES(context, offsets.shape().dims() == 2,
diff --git a/tensorflow/core/kernels/image/crop_and_resize_op.cc b/tensorflow/core/kernels/image/crop_and_resize_op.cc
index 4efc4ae8846d17..65f972e1730752 100644
--- a/tensorflow/core/kernels/image/crop_and_resize_op.cc
+++ b/tensorflow/core/kernels/image/crop_and_resize_op.cc
@@ -169,14 +169,15 @@ class CropAndResizeOp : public AsyncOpKernel {
         context, crop_height > 0 && crop_width > 0,
         errors::InvalidArgument("crop dimensions must be positive"), done);
 
+    TensorShape shape;
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(num_boxes), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_height), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_width), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done);
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(
-            0, TensorShape({num_boxes, crop_height, crop_width, depth}),
-            &output),
-        done);
+    OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output),
+                         done);
 
     auto compute_callback = [this, context, output]() {
       const Tensor& image = context->input(0);
@@ -407,14 +408,15 @@ class CropAndResizeGradImageOp : public AsyncOpKernel {
         context, grads.dim_size(3) == depth,
         errors::InvalidArgument("image_size and grads are incompatible"), done);
 
+    TensorShape shape;
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(batch_size), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_height), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_width), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done);
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(
-            0, TensorShape({batch_size, image_height, image_width, depth}),
-            &output),
-        done);
+    OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output),
+                         done);
 
     auto compute_callback = [this, context, output]() {
       const Tensor& grads = context->input(0);
diff --git a/tensorflow/core/kernels/image/draw_bounding_box_op.cc b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
index 30de99b7d560a2..926ea368a58ba8 100644
--- a/tensorflow/core/kernels/image/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
@@ -73,6 +73,12 @@ class DrawBoundingBoxesOp : public OpKernel {
         errors::InvalidArgument("Channel depth should be either 1 (GRY), "
                                 "3 (RGB), or 4 (RGBA)"));
 
+    OP_REQUIRES(
+        context, boxes.dim_size(2) == 4,
+        errors::InvalidArgument(
+            "The size of the third dimension of the box must be 4. Received: ",
+            boxes.dim_size(2)));
+
     const int64 batch_size = images.dim_size(0);
     const int64 height = images.dim_size(1);
     const int64 width = images.dim_size(2);
@@ -147,22 +153,46 @@ class DrawBoundingBoxesOp : public OpKernel {
 
         // At this point, {min,max}_box_{row,col}_clamp are inside the
         // image.
-        CHECK_GE(min_box_row_clamp, 0);
-        CHECK_GE(max_box_row_clamp, 0);
-        CHECK_LT(min_box_row_clamp, height);
-        CHECK_LT(max_box_row_clamp, height);
-        CHECK_GE(min_box_col_clamp, 0);
-        CHECK_GE(max_box_col_clamp, 0);
-        CHECK_LT(min_box_col_clamp, width);
-        CHECK_LT(max_box_col_clamp, width);
+        OP_REQUIRES(
+            context, min_box_row_clamp >= 0,
+            errors::InvalidArgument("Min box row clamp is less than 0."));
+        OP_REQUIRES(
+            context, max_box_row_clamp >= 0,
+            errors::InvalidArgument("Max box row clamp is less than 0."));
+        OP_REQUIRES(context, min_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        "Min box row clamp is greater than height."));
+        OP_REQUIRES(context, max_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        "Max box row clamp is greater than height."));
+
+        OP_REQUIRES(
+            context, min_box_col_clamp >= 0,
+            errors::InvalidArgument("Min box col clamp is less than 0."));
+        OP_REQUIRES(
+            context, max_box_col_clamp >= 0,
+            errors::InvalidArgument("Max box col clamp is less than 0."));
+        OP_REQUIRES(context, min_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        "Min box col clamp is greater than width."));
+        OP_REQUIRES(context, max_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        "Max box col clamp is greater than width."));
 
         // At this point, the min_box_row and min_box_col are either
         // in the image or above/left of it, and max_box_row and
         // max_box_col are either in the image or below/right or it.
-        CHECK_LT(min_box_row, height);
-        CHECK_GE(max_box_row, 0);
-        CHECK_LT(min_box_col, width);
-        CHECK_GE(max_box_col, 0);
+
+        OP_REQUIRES(
+            context, min_box_row <= height,
+            errors::InvalidArgument("Min box row is greater than height."));
+        OP_REQUIRES(context, max_box_row >= 0,
+                    errors::InvalidArgument("Max box row is less than 0."));
+        OP_REQUIRES(
+            context, min_box_col <= width,
+            errors::InvalidArgument("Min box col is greater than width."));
+        OP_REQUIRES(context, max_box_col >= 0,
+                    errors::InvalidArgument("Max box col is less than 0."));
 
         // Draw top line.
         if (min_box_row >= 0) {
diff --git a/tensorflow/core/kernels/image/encode_png_op.cc b/tensorflow/core/kernels/image/encode_png_op.cc
index 8dbe1d377df5c6..09bcdbe5e3db0b 100644
--- a/tensorflow/core/kernels/image/encode_png_op.cc
+++ b/tensorflow/core/kernels/image/encode_png_op.cc
@@ -54,6 +54,8 @@ class EncodePngOp : public OpKernel {
     OP_REQUIRES(context, image.dims() == 3,
                 errors::InvalidArgument("image must be 3-dimensional",
                                         image.shape().DebugString()));
+    OP_REQUIRES(context, image.NumElements() > 0,
+                errors::Internal("Invalid image provided."));
     OP_REQUIRES(
         context,
         FastBoundsCheck(image.NumElements(), std::numeric_limits<int32>::max()),
diff --git a/tensorflow/core/kernels/image/non_max_suppression_op.cc b/tensorflow/core/kernels/image/non_max_suppression_op.cc
index 701753a81d6188..e6f9b188409622 100644
--- a/tensorflow/core/kernels/image/non_max_suppression_op.cc
+++ b/tensorflow/core/kernels/image/non_max_suppression_op.cc
@@ -161,6 +161,8 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
                            bool pad_to_max_output_size = false,
                            int* ptr_num_valid_outputs = nullptr) {
   const int output_size = max_output_size.scalar<int>()();
+  OP_REQUIRES(context, output_size >= 0,
+              errors::InvalidArgument("output size must be non-negative"));
 
   std::vector<T> scores_data(num_boxes);
   std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
@@ -759,6 +761,9 @@ class NonMaxSuppressionV4Op : public OpKernel {
         context, scores, num_boxes, max_output_size, iou_threshold_val,
         score_threshold_val, dummy_soft_nms_sigma, similarity_fn,
         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
 
     // Allocate scalar output tensor for number of indices computed.
     Tensor* num_outputs_t = nullptr;
@@ -836,6 +841,9 @@ class NonMaxSuppressionV5Op : public OpKernel {
         context, scores, num_boxes, max_output_size, iou_threshold_val,
         score_threshold_val, soft_nms_sigma_val, similarity_fn,
         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
 
     // Allocate scalar output tensor for number of indices computed.
     Tensor* num_outputs_t = nullptr;
@@ -921,6 +929,8 @@ class CombinedNonMaxSuppressionOp : public OpKernel {
         errors::InvalidArgument("max_size_per_class must be 0-D, got shape ",
                                 max_output_size.shape().DebugString()));
     const int max_size_per_class = max_output_size.scalar<int>()();
+    OP_REQUIRES(context, max_size_per_class > 0,
+                errors::InvalidArgument("max_size_per_class must be positive"));
     // max_total_size: scalar
     const Tensor& max_total_size = context->input(3);
     OP_REQUIRES(
diff --git a/tensorflow/core/kernels/immutable_constant_op.cc b/tensorflow/core/kernels/immutable_constant_op.cc
index 1cfbdb82778913..df0d76ce633e9b 100644
--- a/tensorflow/core/kernels/immutable_constant_op.cc
+++ b/tensorflow/core/kernels/immutable_constant_op.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "tensorflow/core/framework/types.pb.h"
+
 namespace tensorflow {
 
 namespace {
@@ -86,6 +88,9 @@ ImmutableConstantOp::ImmutableConstantOp(OpKernelConstruction* context)
   OP_REQUIRES_OK(context,
                  context->GetAttr(kMemoryRegionNameAttr, &region_name_));
   OP_REQUIRES_OK(context, context->GetAttr(kDTypeAttr, &dtype_));
+  OP_REQUIRES(context, dtype_ != DT_RESOURCE && dtype_ != DT_VARIANT,
+              errors::InvalidArgument(
+                  "Resource and variant dtypes are invalid for this op."));
   OP_REQUIRES_OK(context, context->GetAttr(kShapeAttr, &shape_));
 }
 
@@ -95,6 +100,9 @@ void ImmutableConstantOp::Compute(OpKernelContext* ctx) {
 
   OP_REQUIRES_OK(ctx,
                  allocator->InitializeFromRegion(region_name_, ctx->env()));
+  OP_REQUIRES(ctx, dtype_ != DT_STRING,
+              errors::Unimplemented("Sorry, DT_STRING is not currently "
+                                    "supported for ImmutableConstOp."));
   ctx->set_output(0, Tensor(allocator.get(), dtype_, shape_));
   OP_REQUIRES_OK(ctx, allocator->allocation_status());
   // Allocator is owned by the tensor from this point.
diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc
index d52a8b55a35d79..40ce8918a39ade 100644
--- a/tensorflow/core/kernels/immutable_constant_op_test.cc
+++ b/tensorflow/core/kernels/immutable_constant_op_test.cc
@@ -146,7 +146,8 @@ TEST(ImmutableConstantOpTest, ExecutionError) {
       error::INTERNAL);
 }
 
-Status CreateTempFile(Env* env, float value, uint64 size, string* filename) {
+Status CreateTempFileFloat(Env* env, float value, uint64 size,
+                           string* filename) {
   const string dir = testing::TmpDir();
   *filename = io::JoinPath(dir, strings::StrCat("file_", value));
   std::unique_ptr<WritableFile> file;
@@ -166,8 +167,8 @@ TEST(ImmutableConstantOpTest, FromFile) {
   auto root = Scope::NewRootScope().ExitOnError();
 
   string two_file, three_file;
-  TF_ASSERT_OK(CreateTempFile(env, 2.0f, 1000, &two_file));
-  TF_ASSERT_OK(CreateTempFile(env, 3.0f, 1000, &three_file));
+  TF_ASSERT_OK(CreateTempFileFloat(env, 2.0f, 1000, &two_file));
+  TF_ASSERT_OK(CreateTempFileFloat(env, 3.0f, 1000, &three_file));
   auto node1 = ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, two_file);
   auto node2 =
       ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, three_file);
@@ -190,5 +191,39 @@ TEST(ImmutableConstantOpTest, FromFile) {
   EXPECT_EQ(outputs.front().flat<float>()(2), 2.0f * 3.0f);
 }
 
+Status CreateTempFileBadString(Env* env, char value, uint64 size,
+                               const string suffix, string* filename) {
+  const string dir = testing::TmpDir();
+  *filename = io::JoinPath(dir, strings::StrCat("file_", suffix));
+  std::unique_ptr<WritableFile> file;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file));
+  TF_RETURN_IF_ERROR(file->Append(std::string(size, value)));
+  TF_RETURN_IF_ERROR(file->Close());
+  return Status::OK();
+}
+
+TEST(ImmutableConstantOpTest, FromFileStringUnimplmented) {
+  const TensorShape kFileTensorShape({1});
+  Env* env = Env::Default();
+  auto root = Scope::NewRootScope().ExitOnError();
+
+  string bad_file;
+  TF_ASSERT_OK(CreateTempFileBadString(env, '\xe2', 128, "bad_e2", &bad_file));
+  auto result =
+      ops::ImmutableConst(root, DT_STRING, kFileTensorShape, bad_file);
+  GraphDef graph_def;
+  TF_ASSERT_OK(root.ToGraphDef(&graph_def));
+  SessionOptions session_options;
+  session_options.env = Env::Default();
+  std::unique_ptr<Session> session(NewSession(session_options));
+  ASSERT_TRUE(session != nullptr) << "Failed to create session";
+  TF_ASSERT_OK(session->Create(graph_def)) << "Can't create test graph";
+  std::vector<Tensor> outputs;
+  // Check that the run returned error.
+  EXPECT_EQ(
+      session->Run({}, {result.node()->name() + ":0"}, {}, &outputs).code(),
+      error::UNIMPLEMENTED);
+}
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc
index 1849cb42883099..e72732e99b24f2 100644
--- a/tensorflow/core/kernels/inplace_ops.cc
+++ b/tensorflow/core/kernels/inplace_ops.cc
@@ -71,6 +71,15 @@ class ParallelConcatUpdate : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     auto value = ctx->input(0);
+    // Value should be at least rank 1. Also the 0th dimension should be
+    // at least loc_.
+    OP_REQUIRES(ctx, value.dims() >= 1,
+                errors::InvalidArgument("value should be at least rank 1."));
+    OP_REQUIRES(
+        ctx, value.dim_size(0) > loc_,
+        errors::InvalidArgument("0th dimension of value = ", value.dim_size(0),
+                                " is less than loc_=", loc_));
+
     auto update = ctx->input(1);
 
     OP_REQUIRES(
@@ -225,7 +234,7 @@ class InplaceOpBase : public OpKernel {
 
     Tensor y = x;  // This creates an alias intentionally.
     // Skip processing if tensors are empty.
-    if (x.NumElements() > 0 || v.NumElements() > 0) {
+    if (x.NumElements() > 0 && v.NumElements() > 0) {
       OP_REQUIRES_OK(ctx, DoCompute(ctx, i, v, &y));
     }
     ctx->set_output(0, y);
diff --git a/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
index bec28088ad1a6e..b719f55b507b02 100644
--- a/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
+++ b/tensorflow/core/kernels/linalg/banded_triangular_solve_op.cc
@@ -217,6 +217,7 @@ class BandedTriangularSolveOpCpu : public OpKernel {
     const Tensor& in1 = ctx->input(1);
 
     ValidateInputTensors(ctx, in0, in1);
+    if (!ctx->status().ok()) return;
 
     MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes());
     OP_REQUIRES(
@@ -275,6 +276,14 @@ class BandedTriangularSolveOpCpu : public OpKernel {
     OP_REQUIRES(
         ctx, in1.dims() >= 2,
         errors::InvalidArgument("In[1] ndims must be >= 2: ", in1.dims()));
+
+    OP_REQUIRES(ctx, in0.NumElements() > 0,
+                errors::InvalidArgument("In[0] must not be an empty tensor: ",
+                                        in0.DebugString()));
+
+    OP_REQUIRES(ctx, in1.NumElements() > 0,
+                errors::InvalidArgument("In[1] must not be an empty tensor: ",
+                                        in1.DebugString()));
   }
   bool lower_;
   bool adjoint_;
diff --git a/tensorflow/core/kernels/linalg/einsum_op_impl.h b/tensorflow/core/kernels/linalg/einsum_op_impl.h
index b9b2d1f0eae459..2f45c238b67f78 100644
--- a/tensorflow/core/kernels/linalg/einsum_op_impl.h
+++ b/tensorflow/core/kernels/linalg/einsum_op_impl.h
@@ -155,6 +155,7 @@ struct EinsumHelper {
     input_has_ellipsis->resize(num_inputs);
     for (int i = 0; i < num_inputs; ++i) {
       input_label_counts->at(i).resize(num_labels);
+      input_has_ellipsis->at(i) = false;
       for (const int label : input_labels->at(i)) {
         if (label != kEllipsisLabel)
           input_label_counts->at(i)[label] += 1;
@@ -163,6 +164,7 @@ struct EinsumHelper {
       }
     }
     output_label_counts->resize(num_labels);
+    *output_has_ellipsis = false;
     for (const int label : *output_labels) {
       if (label != kEllipsisLabel)
         output_label_counts->at(label) += 1;
diff --git a/tensorflow/core/kernels/linalg/matrix_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_diag_op.cc
index 69cc8170793ae3..c797afa455675b 100644
--- a/tensorflow/core/kernels/linalg/matrix_diag_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_diag_op.cc
@@ -73,6 +73,9 @@ class MatrixDiagPartOp : public OpKernel {
                   errors::InvalidArgument(
                       "diag_index must be a scalar or vector, received shape: ",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      "Expected diag_index to have at least 1 element"));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
@@ -86,7 +89,10 @@ class MatrixDiagPartOp : public OpKernel {
           upper_diag_index = diag_index.flat<int32>()(1);
         }
       }
-      padding_value = context->input(2).flat<T>()(0);
+      const Tensor& padding_in = context->input(2);
+      OP_REQUIRES(context, padding_in.NumElements() == 1,
+                  errors::InvalidArgument("Padding must be scalar."));
+      padding_value = padding_in.flat<T>()(0);
     }
     const TensorShape& input_shape = input.shape();
 
@@ -179,6 +185,9 @@ class MatrixDiagOp : public OpKernel {
                   errors::InvalidArgument(
                       "diag_index must be a scalar or vector, received shape: ",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      "Expected diag_index to have at least 1 element"));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
@@ -192,9 +201,22 @@ class MatrixDiagOp : public OpKernel {
           upper_diag_index = diag_index.flat<int32>()(1);
         }
       }
-      num_rows = context->input(2).flat<int32>()(0);
-      num_cols = context->input(3).flat<int32>()(0);
-      padding_value = context->input(4).flat<T>()(0);
+
+      auto& num_rows_tensor = context->input(2);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_rows_tensor.shape()),
+                  errors::InvalidArgument("num_rows must be a scalar"));
+      num_rows = num_rows_tensor.flat<int32>()(0);
+
+      auto& num_cols_tensor = context->input(3);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_cols_tensor.shape()),
+                  errors::InvalidArgument("num_cols must be a scalar"));
+      num_cols = num_cols_tensor.flat<int32>()(0);
+
+      auto& padding_value_tensor = context->input(4);
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(padding_value_tensor.shape()),
+                  errors::InvalidArgument("padding_value must be a scalar"));
+      padding_value = padding_value_tensor.flat<T>()(0);
     }
 
     // Size validations.
diff --git a/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
index df32228d0f21bb..19cbc371f318ae 100644
--- a/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_set_diag_op.cc
@@ -70,6 +70,9 @@ class MatrixSetDiagOp : public OpKernel {
                   errors::InvalidArgument(
                       "diag_index must be a scalar or vector, received shape: ",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(
+          context, diag_index.NumElements() > 0,
+          errors::InvalidArgument("diag_index must have at least one element"));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
diff --git a/tensorflow/core/kernels/linalg/matrix_solve_op.cc b/tensorflow/core/kernels/linalg/matrix_solve_op.cc
index 70f02bddf9b785..aeb0203b4a337d 100644
--- a/tensorflow/core/kernels/linalg/matrix_solve_op.cc
+++ b/tensorflow/core/kernels/linalg/matrix_solve_op.cc
@@ -143,15 +143,22 @@ class MatrixSolveOpGpu : public AsyncOpKernel {
                       done);
     OP_REQUIRES_ASYNC(
         context, input.dim_size(ndims - 2) == n,
-        errors::InvalidArgument("Input matrices must be squares, got",
+        errors::InvalidArgument("Input matrices must be squares, got ",
                                 input.dim_size(ndims - 2), " != ", n),
         done);
     OP_REQUIRES_ASYNC(context, rhs.dim_size(ndims - 2) == n,
                       errors::InvalidArgument(
                           "Input matrix and right-hand side must have the "
-                          "same number of rows, got",
+                          "same number of rows, got ",
                           n, " != ", rhs.dim_size(ndims - 2)),
                       done);
+    for (int dim = 0; dim < ndims - 2; dim++) {
+      OP_REQUIRES_ASYNC(
+          context, input.dim_size(dim) == rhs.dim_size(dim),
+          errors::InvalidArgument(
+              "All input tensors must have the same outer dimensions."),
+          done);
+    }
 
     // Allocate output.
     Tensor* output;
diff --git a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
index 99249f792b6ed8..ce5392e62b9fa6 100644
--- a/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
+++ b/tensorflow/core/kernels/linalg/matrix_triangular_solve_op_impl.h
@@ -162,6 +162,9 @@ class BaseMatrixTriangularSolveOp : public OpKernel {
     const Tensor& in1 = ctx->input(1);
 
     ValidateInputTensors(ctx, in0, in1);
+    if (!ctx->status().ok()) {
+      return;
+    }
 
     MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes());
     OP_REQUIRES(
@@ -230,13 +233,22 @@ class MatrixTriangularSolveOp
  private:
   void ValidateInputTensors(OpKernelContext* ctx, const Tensor& in0,
                             const Tensor& in1) override {
+    const auto in0_num_dims = in0.dims();
     OP_REQUIRES(
-        ctx, in0.dims() >= 2,
-        errors::InvalidArgument("In[0] ndims must be >= 2: ", in0.dims()));
+        ctx, in0_num_dims >= 2,
+        errors::InvalidArgument("In[0] ndims must be >= 2: ", in0_num_dims));
 
+    const auto in1_num_dims = in1.dims();
     OP_REQUIRES(
-        ctx, in1.dims() >= 2,
-        errors::InvalidArgument("In[0] ndims must be >= 2: ", in1.dims()));
+        ctx, in1_num_dims >= 2,
+        errors::InvalidArgument("In[1] ndims must be >= 2: ", in1_num_dims));
+
+    const auto in0_last_dim = in0.dim_size(in0_num_dims - 1);
+    const auto in0_prev_dim = in0.dim_size(in0_num_dims - 2);
+    OP_REQUIRES(ctx, in0_last_dim == in0_prev_dim,
+                errors::InvalidArgument(
+                    "In[0] matrices in the last dimensions must be square (",
+                    in0_last_dim, " =/= ", in0_prev_dim, ")"));
   }
 };
 
diff --git a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
index a65db40d822abc..1f59d4311c3ab4 100644
--- a/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/tridiagonal_matmul_op_gpu.cu.cc
@@ -66,6 +66,12 @@ class TridiagonalMatMulOpGpu : public OpKernel {
     const Tensor& rhs = context->input(3);
 
     const int ndims = rhs.dims();
+    OP_REQUIRES(
+        context, ndims >= 2,
+        errors::InvalidArgument("Input must have rank >= 2, but got ", ndims));
+    OP_REQUIRES_OK(context, ValidateInputTensor(superdiag, "superdiag", rhs));
+    OP_REQUIRES_OK(context, ValidateInputTensor(maindiag, "maindiag", rhs));
+    OP_REQUIRES_OK(context, ValidateInputTensor(subdiag, "subdiag", rhs));
     int64 batch_size = 1;
     for (int i = 0; i < ndims - 2; i++) {
       batch_size *= rhs.dim_size(i);
@@ -85,6 +91,39 @@ class TridiagonalMatMulOpGpu : public OpKernel {
         maindiag.flat<Scalar>().data(), subdiag.flat<Scalar>().data(),
         rhs.flat<Scalar>().data(), output->flat<Scalar>().data()));
   }
+
+ private:
+  Status ValidateInputTensor(const Tensor& tensor,
+                             const std::string& tensor_name,
+                             const Tensor& rhs) {
+    const int ndims = rhs.dims();
+    if (tensor.dims() != ndims) {
+      return errors::InvalidArgument(tensor_name,
+                                     " must have same rank as rhs, but got ",
+                                     tensor.dims(), " and ", ndims);
+    }
+    for (int i = 0; i < ndims - 2; i++) {
+      if (tensor.dim_size(i) != rhs.dim_size(i)) {
+        return errors::InvalidArgument(
+            tensor_name,
+            " must have same outer dimensions as rhs, but for index ", i,
+            ", got ", tensor.dim_size(i), " and ", rhs.dim_size(i));
+      }
+    }
+    if (tensor.dim_size(ndims - 2) != 1) {
+      return errors::InvalidArgument(
+          tensor_name, "'s second-to-last dimension must be 1, but got ",
+          tensor.dim_size(ndims - 2));
+    }
+    if (tensor.dim_size(ndims - 1) != rhs.dim_size(ndims - 2)) {
+      return errors::InvalidArgument(tensor_name,
+                                     "'s last dimension size must be rhs's "
+                                     "second-to-last dimension size, but got ",
+                                     tensor.dim_size(ndims - 1), " and ",
+                                     rhs.dim_size(ndims - 2));
+    }
+    return Status::OK();
+  }
 };
 
 REGISTER_LINALG_OP_GPU("TridiagonalMatMul", (TridiagonalMatMulOpGpu<float>),
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 9a2f373f5ce0cf..488e02337f707b 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -302,6 +302,10 @@ class TensorListReserve : public OpKernel {
     PartialTensorShape element_shape;
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(0), &element_shape));
     int32 num_elements = c->input(1).scalar<int32>()();
+    OP_REQUIRES(c, num_elements >= 0,
+                errors::InvalidArgument("The num_elements to reserve must be a "
+                                        "non negative number, but got ",
+                                        num_elements));
     TensorList output;
     output.element_shape = element_shape;
     output.element_dtype = element_dtype_;
diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index 1a99eac31c430c..4a5f9e9bece6e2 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -282,16 +282,16 @@ class TensorListConcat : public OpKernel {
   explicit TensorListConcat(OpKernelConstruction* c) : OpKernel(c) {
     OP_REQUIRES_OK(c, c->GetAttr("element_dtype", &element_dtype_));
     if (c->HasAttr("element_shape")) {
-      PartialTensorShape element_shape;
-      OP_REQUIRES_OK(c, c->GetAttr("element_shape", &element_shape));
-      if (!element_shape.unknown_rank()) {
-        element_shape_except_first_dim_ = PartialTensorShape(
-            gtl::ArraySlice<int64>(element_shape.dim_sizes()).subspan(1));
-      }
+      OP_REQUIRES_OK(c, c->GetAttr("element_shape", &element_shape_));
     }
   }
 
   void Compute(OpKernelContext* c) override {
+    PartialTensorShape element_shape_except_first_dim;
+    if (!element_shape_.unknown_rank()) {
+      element_shape_except_first_dim = PartialTensorShape(
+          gtl::ArraySlice<int64>(element_shape_.dim_sizes()).subspan(1));
+    }
     // Check that the input Variant tensor is indeed a TensorList and has the
     // correct element type.
     const TensorList* tensor_list = nullptr;
@@ -315,21 +315,21 @@ class TensorListConcat : public OpKernel {
                       "Concat requires elements to be at least vectors, ",
                       "found scalars instead."));
       // Split `element_shape` into `first_dim` and
-      // `element_shape_except_first_dim_`.
+      // `element_shape_except_first_dim`.
       first_dim = element_shape.dim_size(0);
-      element_shape_except_first_dim_ = element_shape;
-      element_shape_except_first_dim_.RemoveDim(0);
+      element_shape_except_first_dim = element_shape;
+      element_shape_except_first_dim.RemoveDim(0);
     }
-    // If the TensorList is empty, element_shape_except_first_dim_ must be fully
+    // If the TensorList is empty, element_shape_except_first_dim must be fully
     // defined.
     OP_REQUIRES(c,
                 !tensor_list->tensors().empty() ||
-                    element_shape_except_first_dim_.IsFullyDefined(),
+                    element_shape_except_first_dim.IsFullyDefined(),
                 errors::InvalidArgument(
                     "All except the first dimension must be fully defined ",
                     "when concating an empty tensor list. element_shape: ",
-                    element_shape_except_first_dim_.DebugString()));
-    // 1. Check that `element_shape_except_first_dim_` input tensor is
+                    element_shape_except_first_dim.DebugString()));
+    // 1. Check that `element_shape_except_first_dim` input tensor is
     //    compatible with the shapes of element tensors.
     // 2. Check that the elements have the same shape except the first dim.
     // 3. If `first_dim` is known, check that it is compatible with the leading
@@ -343,7 +343,7 @@ class TensorListConcat : public OpKernel {
       for (int i = 0; i < tensor_list->tensors().size(); ++i) {
         const Tensor& t = tensor_list->tensors()[i];
         if (t.dtype() != DT_INVALID) {
-          PartialTensorShape tmp = element_shape_except_first_dim_;
+          PartialTensorShape tmp = element_shape_except_first_dim;
           OP_REQUIRES(
               c, TensorShapeUtils::IsVectorOrHigher(t.shape()),
               errors::InvalidArgument("Concat saw a scalar shape at index ", i,
@@ -351,7 +351,7 @@ class TensorListConcat : public OpKernel {
           TensorShape shape_except_first_dim = TensorShape(
               gtl::ArraySlice<int64>(t.shape().dim_sizes()).subspan(1));
           OP_REQUIRES_OK(c, tmp.MergeWith(shape_except_first_dim,
-                                          &element_shape_except_first_dim_));
+                                          &element_shape_except_first_dim));
           OP_REQUIRES(c, first_dim == -1 || first_dim == t.shape().dim_size(0),
                       errors::InvalidArgument(
                           "First entry of element_shape input does not match ",
@@ -371,12 +371,11 @@ class TensorListConcat : public OpKernel {
       first_dim = inferred_first_dim;
     }
     TensorShape output_shape;
-    OP_REQUIRES(
-        c, element_shape_except_first_dim_.AsTensorShape(&output_shape),
-        errors::InvalidArgument(
-            "Trying to concat list with only uninitialized tensors ",
-            "but element_shape_except_first_dim_ is not fully defined: ",
-            element_shape_except_first_dim_.DebugString()));
+    OP_REQUIRES(c, element_shape_except_first_dim.AsTensorShape(&output_shape),
+                errors::InvalidArgument(
+                    "Trying to concat list with only uninitialized tensors ",
+                    "but element_shape_except_first_dim is not fully defined: ",
+                    element_shape_except_first_dim.DebugString()));
     // Build the lengths_tensor and leading dim of the output tensor by
     // iterating over all element tensors.
     Tensor* lengths_tensor = nullptr;
@@ -467,7 +466,7 @@ class TensorListConcat : public OpKernel {
 
  private:
   DataType element_dtype_;
-  PartialTensorShape element_shape_except_first_dim_;
+  PartialTensorShape element_shape_;
 };
 
 template <typename Device, typename T>
diff --git a/tensorflow/core/kernels/load_and_remap_matrix_op.cc b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
index cb0245a9b61261..5ec28c70358132 100644
--- a/tensorflow/core/kernels/load_and_remap_matrix_op.cc
+++ b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
@@ -123,6 +123,11 @@ class LoadAndRemapMatrixOp : public OpKernel {
     // Processes the checkpoint source and the provided Tensor name.
     const Tensor* ckpt_path_t;
     OP_REQUIRES_OK(context, context->input("ckpt_path", &ckpt_path_t));
+    OP_REQUIRES(
+        context, ckpt_path_t->NumElements() == 1,
+        errors::InvalidArgument("The `ckpt_path` tensor must have exactly one "
+                                "element, got tensor of shape ",
+                                ckpt_path_t->shape().DebugString()));
     const string& ckpt_path = ckpt_path_t->scalar<tstring>()();
     const Tensor* old_tensor_name_t;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index f269aa65b4e911..09d9d32d2ae9ed 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -56,14 +56,25 @@ class MutableHashTableOfScalars final : public LookupInterface {
 
   Status Find(OpKernelContext* ctx, const Tensor& key, Tensor* value,
               const Tensor& default_value) override {
-    const V default_val = default_value.flat<V>()(0);
     const auto key_values = key.flat<K>();
     auto value_values = value->flat<V>();
+    const auto default_flat = default_value.flat<V>();
+
+    int64 total = value_values.size();
+    int64 default_total = default_flat.size();
+    bool is_full_size_default = (total == default_total);
 
     tf_shared_lock l(mu_);
     for (int64 i = 0; i < key_values.size(); ++i) {
+      // is_full_size_default is true: 
+      //   Each key has an independent default value, key_values(i) 
+      //   corresponding uses default_flat(i) as its default value.
+      // 
+      // is_full_size_default is false: 
+      //   All keys will share the default_flat(0) as default value.
       value_values(i) = gtl::FindWithDefault(
-          table_, SubtleMustCopyIfIntegral(key_values(i)), default_val);
+          table_, SubtleMustCopyIfIntegral(key_values(i)),
+          is_full_size_default ? default_flat(i) : default_flat(0));
     }
 
     return Status::OK();
@@ -173,11 +184,15 @@ class MutableHashTableOfTensors final : public LookupInterface {
 
   Status Find(OpKernelContext* ctx, const Tensor& key, Tensor* value,
               const Tensor& default_value) override {
-    const auto default_flat = default_value.flat<V>();
+    const auto default_flat = default_value.flat_inner_dims<V, 2>();
     const auto key_values = key.flat<K>();
     auto value_values = value->flat_inner_dims<V, 2>();
     int64 value_dim = value_shape_.dim_size(0);
 
+    int64 total = value_values.size();
+    int64 default_total = default_flat.size();
+    bool is_full_size_default = (total == default_total);
+
     tf_shared_lock l(mu_);
     for (int64 i = 0; i < key_values.size(); ++i) {
       ValueArray* value_vec =
@@ -187,8 +202,15 @@ class MutableHashTableOfTensors final : public LookupInterface {
           value_values(i, j) = value_vec->at(j);
         }
       } else {
+        // is_full_size_default is true: 
+        //   Each key has an independent default value, key_values(i) 
+        //   corresponding uses default_flat(i) as its default value.
+        //
+        // is_full_size_default is false: 
+        //   All keys will share the default_flat(0) as default value.
         for (int64 j = 0; j < value_dim; j++) {
-          value_values(i, j) = default_flat(j);
+          value_values(i, j) =
+              is_full_size_default ? default_flat(i, j) : default_flat(0, j);
         }
       }
     }
diff --git a/tensorflow/core/kernels/map_stage_op.cc b/tensorflow/core/kernels/map_stage_op.cc
index 89b760ea4d0c37..fd03d2e187e1d8 100644
--- a/tensorflow/core/kernels/map_stage_op.cc
+++ b/tensorflow/core/kernels/map_stage_op.cc
@@ -210,9 +210,9 @@ class StagingMap : public ResourceBase {
                                    const OptionalTuple& tuple)
       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (tuple[index].has_value()) {
-      return Status(errors::InvalidArgument(
+      return errors::InvalidArgument(
           "The tensor for index '", index, "' for key '", key.scalar<int64>()(),
-          "' was already initialized '", dtypes_.size(), "'."));
+          "' was already initialized '", dtypes_.size(), "'.");
     }
 
     return Status::OK();
@@ -220,6 +220,10 @@ class StagingMap : public ResourceBase {
 
   // Check that the indices are strictly ordered
   Status check_index_ordering(const Tensor& indices) {
+    if (indices.NumElements() == 0) {
+      return errors::InvalidArgument("Indices are empty");
+    }
+
     auto findices = indices.flat<int>();
 
     for (std::size_t i = 0; i < findices.dimension(0) - 1; ++i) {
@@ -227,8 +231,7 @@ class StagingMap : public ResourceBase {
         continue;
       }
 
-      return Status(
-          errors::InvalidArgument("Indices are not strictly ordered"));
+      return errors::InvalidArgument("Indices are not strictly ordered");
     }
 
     return Status::OK();
@@ -238,10 +241,10 @@ class StagingMap : public ResourceBase {
   Status check_memory_limit(std::size_t bytes)
       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (has_memory_limit() && bytes > memory_limit_) {
-      return Status(errors::ResourceExhausted(
+      return errors::ResourceExhausted(
           "Attempted to insert tensors with combined size of '", bytes,
           "' bytes into Staging Area with a memory limit of '", memory_limit_,
-          "'."));
+          "'.");
     }
 
     return Status::OK();
@@ -527,6 +530,8 @@ class MapStageOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input("key", &key_tensor));
     OP_REQUIRES_OK(ctx, ctx->input("indices", &indices_tensor));
     OP_REQUIRES_OK(ctx, ctx->input_list("values", &values_tensor));
+    OP_REQUIRES(ctx, key_tensor->NumElements() > 0,
+                errors::InvalidArgument("key must not be empty"));
 
     // Create copy for insertion into Staging Area
     Tensor key(*key_tensor);
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index cd9680c8212f6c..e5387e4ca56ab4 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -68,6 +68,7 @@ static void SpatialMaxPoolWithArgMaxHelper(
             "SpatialMaxPoolWithArgMaxHelper requires include_batch_in_index "
             "to be True when when input_backprop != nullptr"));
   }
+  if (tensor_in.NumElements() == 0 || output->NumElements() == 0) return;
 
   typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
       ConstEigenMatrixMap;
@@ -192,7 +193,9 @@ static void SpatialMaxPoolWithArgMaxHelper(
         // CHECK(input_backprop_index >= in_start && input_backprop_index <
         // in_end)
         FastBoundsCheck(input_backprop_index - in_start, in_end - in_start);
-        input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        if (index < out_backprop.NumElements()) {
+          input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        }
       }
     }
   };
@@ -940,6 +943,10 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
+    OP_REQUIRES(context, tensor_in.dims() == 4,
+                errors::InvalidArgument("tensor_in must be 4-dimensional (2)"));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument("tensor_in must not be empty (2)"));
 
     PoolParameters params{context,
                           ksize_,
@@ -1007,6 +1014,9 @@ struct LaunchMaxPoolingGradWithArgmax<CPUDevice, T> {
         const int input_start = start * input_size_per_batch;
         const int input_end = limit * input_size_per_batch;
         for (int64 index = input_start; index < input_end; index++) {
+          if (index >= argmax.NumElements()) {
+            break;
+          }
           int64 grad_out_index = argmax_flat(index);
           if (!include_batch_in_index) {
             const int64 cur_batch = index / input_size_per_batch;
@@ -1077,6 +1087,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {0}, 0, out_shape, &grad_out));
 
+    if (out_shape.num_elements() == 0) return;  // nothing to be done
+
     LaunchMaxPoolingGradWithArgmax<Device, T>::launch(
         context, params, grad_in, argmax, grad_out, include_batch_in_index_);
   }
diff --git a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
index f6bc773de4fa0c..9e81189761d28e 100644
--- a/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl/mkl_requantization_range_per_channel_op.cc
@@ -57,6 +57,20 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
         ctx, input_max.dim_size(0) == depth,
         errors::InvalidArgument("input_max has incorrect size, expected ",
                                 depth, " was ", input_max.dim_size(0)));
+    OP_REQUIRES(
+        ctx, input_min.NumElements() == depth,
+        errors::InvalidArgument("input_min must have the same number of "
+                                "elements as input_max, got ",
+                                input_min.NumElements(), " and ", depth));
+    OP_REQUIRES(ctx, input.NumElements() > 0,
+                errors::InvalidArgument("input must not be empty"));
+    OP_REQUIRES(ctx, input.dims() == 4,
+                errors::InvalidArgument("input must be in NHWC format"));
+    OP_REQUIRES(
+        ctx, input.dim_size(3) == depth,
+        errors::InvalidArgument(
+            "input must have same number of channels as length of input_min: ",
+            input.dim_size(3), " vs ", depth));
 
     const float* input_min_data = input_min.flat<float>().data();
     const float* input_max_data = input_max.flat<float>().data();
diff --git a/tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc
index 40570a467ea480..996caa0edb0672 100644
--- a/tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc
+++ b/tensorflow/core/kernels/mkl/mkl_requantize_per_channel_op.cc
@@ -49,35 +49,45 @@ class MklRequantizePerChannelOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     try {
       const Tensor& input = ctx->input(kInputTensorIndex);
+      OP_REQUIRES(
+          ctx, input.dims() == 4,
+          errors::InvalidArgument("Current RequantizePerChannel operator"
+                                  "supports 4D tensors only."));
+
       const Tensor& input_min_vec = ctx->input(kInputMinVecIndex);
+      size_t depth = input_min_vec.NumElements();
       float* input_min_vec_data = (float*)const_cast<void*>(
           static_cast<const void*>(input_min_vec.flat<float>().data()));
+
       const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex);
+      OP_REQUIRES(
+          ctx, input_max_vec.NumElements() == depth,
+          errors::InvalidArgument("input_max has incorrect size, expected ",
+                                  depth, " was ", input_max_vec.NumElements()));
       float* input_max_vec_data = (float*)const_cast<void*>(
           static_cast<const void*>(input_max_vec.flat<float>().data()));
 
       const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument("requested_output_min must be a scalar"));
       const float input_requested_min_float =
           input_requested_min.flat<float>()(0);
+
       const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument("requested_output_max must be a scalar"));
       const float input_requested_max_float =
           input_requested_max.flat<float>()(0);
 
-      size_t depth = input_min_vec.NumElements();
-      OP_REQUIRES(
-          ctx, input.dims() == 4,
-          errors::InvalidArgument("Current RequantizePerChannel operator"
-                                  "supports 4D tensors only."));
-      OP_REQUIRES(
-          ctx, input_min_vec.dim_size(0) == depth,
-          errors::InvalidArgument("input_min has incorrect size, expected ",
-                                  depth, " was ", input_min_vec.dim_size(0)));
-      OP_REQUIRES(
-          ctx, input_max_vec.dim_size(0) == depth,
-          errors::InvalidArgument("input_max has incorrect size, expected ",
-                                  depth, " was ", input_max_vec.dim_size(0)));
-
-      if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f);
+      if (out_type_ == DT_QINT8) {
+        OP_REQUIRES(ctx, input_requested_min_float < 0.0f,
+                    errors::InvalidArgument(
+                        "If out_type is QINT8, requested_output_max must be "
+                        "non negative, got ",
+                        input_requested_min_float));
+      }
 
       const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f;
       const float requested_min_max =
diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc
index 4a1d0cfc3e2374..b59db5a59a7bd3 100644
--- a/tensorflow/core/kernels/pad_op.cc
+++ b/tensorflow/core/kernels/pad_op.cc
@@ -84,8 +84,10 @@ class PadOp : public OpKernel {
       OP_REQUIRES(context, before_d >= 0 && after_d >= 0,
                   errors::InvalidArgument("Paddings must be non-negative: ",
                                           before_d, " ", after_d));
-      const int64 size_d = in0.dim_size(d);
-      output_shape.AddDim(before_d + size_d + after_d);
+      const int64_t size_d = in0.dim_size(d);
+      OP_REQUIRES_OK(
+          context, output_shape.AddDimWithStatus(before_d + size_d + after_d));
+
     }
 
     // If there is no padding to be done, forward the input to output.
diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
index a63457551ac29b..116df3541d7cf6 100644
--- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
+++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
@@ -627,6 +627,9 @@ class ParameterizedTruncatedNormalOp : public OpKernel {
         ctx, TensorShapeUtils::IsVector(shape_tensor.shape()),
         errors::InvalidArgument("Input shape should be a vector, got shape: ",
                                 shape_tensor.shape().DebugString()));
+    OP_REQUIRES(ctx, shape_tensor.NumElements() > 0,
+                errors::InvalidArgument("Shape tensor must not be empty, got ",
+                                        shape_tensor.DebugString()));
     int32 num_batches = shape_tensor.flat<int32>()(0);
 
     int32 samples_per_batch = 1;
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 1114e6931ecf94..2faf90bc072cad 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -141,6 +141,11 @@ class Pooling3DOp : public UnaryOp<T> {
     OP_REQUIRES(context, ksize_.size() == 5,
                 errors::InvalidArgument("Sliding window ksize field must "
                                         "specify 5 dimensions"));
+    bool non_negative =
+        std::all_of(ksize_.begin(), ksize_.end(), [](int k) { return k > 0; });
+    OP_REQUIRES(context, non_negative,
+                errors::InvalidArgument("Sliding window ksize field must "
+                                        "have non-negative dimensions"));
     OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
     OP_REQUIRES(context, stride_.size() == 5,
                 errors::InvalidArgument("Sliding window stride field must "
@@ -383,6 +388,19 @@ struct LaunchAvgPooling3dGradOp<CPUDevice, T> {
                      const std::array<int64, 3>& output_shape,
                      const std::array<int64, 3>& padding,
                      TensorFormat data_format, Tensor* output) {
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(0) == out_backprop.dim_size(0),
+        errors::InvalidArgument(
+            "Expected first dimension of tensor_in_shape and "
+            "out_backprop to match, got ",
+            tensor_in_shape.dim_size(0), " and ", out_backprop.dim_size(0)));
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(4) == out_backprop.dim_size(4),
+        errors::InvalidArgument(
+            "Expected last dimension of tensor_in_shape and "
+            "out_backprop to match, got ",
+            tensor_in_shape.dim_size(4), " and ", out_backprop.dim_size(4)));
+
     output->flat<T>().setZero();
     std::array<int64, 3> input_size = {{tensor_in_shape.dim_size(3),
                                         tensor_in_shape.dim_size(2),
@@ -693,11 +711,36 @@ class MaxPooling3dGradGradOp : public OpKernel {
 
     Pool3dParameters params{context,  ksize_,       stride_,
                             padding_, data_format_, tensor_in.shape()};
+    if (!context->status().ok()) return;  // params is invalid
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {2}, 0, tensor_out.shape(), &output));
 
+    // Given access patterns in LaunchMaxPooling3dGradGradOp, these tensors must
+    // have elements.
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument("received empty tensor tensor_in: ",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument("received empty tensor tensor_out: ",
+                                        tensor_out.DebugString()));
+    OP_REQUIRES(
+        context, out_grad_backprop.NumElements() > 0,
+        errors::InvalidArgument("received empty tensor out_grad_backprop: ",
+                                out_grad_backprop.DebugString()));
+    OP_REQUIRES(context,
+                tensor_in.NumElements() == out_grad_backprop.NumElements(),
+                errors::InvalidArgument("tensor_in and out_grad_backprop must "
+                                        "have same number of elements, got <",
+                                        tensor_in.DebugString(), "> and <",
+                                        out_grad_backprop.DebugString(), ">"));
+    OP_REQUIRES(
+        context, tensor_out.NumElements() == output->NumElements(),
+        errors::InvalidArgument(
+            "tensor_out and output must have same number of elements, got <",
+            tensor_out.DebugString(), "> and <", output->DebugString(), ">"));
+
     LaunchMaxPooling3dGradGradOp<Device, T>::launch(
         context, params, tensor_in, tensor_out, out_grad_backprop, output);
   }
diff --git a/tensorflow/core/kernels/pooling_ops_common.cc b/tensorflow/core/kernels/pooling_ops_common.cc
index a0c07f31b3d872..6d0a176625f612 100644
--- a/tensorflow/core/kernels/pooling_ops_common.cc
+++ b/tensorflow/core/kernels/pooling_ops_common.cc
@@ -170,6 +170,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
     pad_depth = 0;
     out_depth = depth;
   } else {
+    OP_REQUIRES(context, depth_window > 0,
+                errors::InvalidArgument("depth_window must not be 0"));
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the
     // depth_stride (no overlapping).
diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h
index dacbb872cf04a7..642fb4b413a0ef 100644
--- a/tensorflow/core/kernels/pooling_ops_common.h
+++ b/tensorflow/core/kernels/pooling_ops_common.h
@@ -194,6 +194,9 @@ class MaxPoolingOp : public OpKernel {
   void SpatialMaxPool(OpKernelContext* context, Tensor* output,
                       const Tensor& tensor_in, const PoolParameters& params,
                       const Padding& padding) {
+    if (output->NumElements() == 0) {
+      return;
+    }
     // On GPU, use Eigen's Spatial Max Pooling.  On CPU, use an
     // EigenMatrix version that is currently faster than Eigen's
     // Spatial MaxPooling implementation.
@@ -447,6 +450,9 @@ class MaxPoolingV2Op : public OpKernel {
   void SpatialMaxPool(OpKernelContext* context, Tensor* output,
                       const Tensor& tensor_in, const PoolParameters& params,
                       const Padding& padding) {
+    if (output->NumElements() == 0) {
+      return;
+    }
     // On GPU, use Eigen's Spatial Max Pooling.  On CPU, use an
     // EigenMatrix version that is currently faster than Eigen's
     // Spatial MaxPooling implementation.
@@ -564,6 +570,9 @@ template <typename Device, typename T>
 void SpatialAvgPool(OpKernelContext* context, Tensor* output,
                     const Tensor& input, const PoolParameters& params,
                     const Padding& padding) {
+  if (output->NumElements() == 0) {
+    return;
+  }
   typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
       ConstEigenMatrixMap;
   typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
index 675bdaec225bd7..d63a49a04be621 100644
--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/core/framework/op_requires.h"
 #define EIGEN_USE_THREADS
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
@@ -71,6 +72,9 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+        ctx, axis_ >= -1,
+        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
     OP_REQUIRES(
         ctx, (axis_ == -1 || axis_ < input.shape().dims()),
         errors::InvalidArgument("Shape must be at least rank ", axis_ + 1,
@@ -154,13 +158,30 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
     Tensor* input_backprop = nullptr;
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, input.shape(), &input_backprop));
+    OP_REQUIRES(
+        ctx, axis_ >= -1,
+        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+                errors::InvalidArgument(
+                    "Axis should be -1 or 0 or a positive value less than ",
+                    input.shape().dims(), "but given axis value was ", axis_));
 
     OP_REQUIRES(
         ctx, input.IsSameSize(gradient),
         errors::InvalidArgument("gradient and input must be the same size"));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     const Tensor& input_min_tensor = ctx->input(2);
+    OP_REQUIRES(ctx,
+                input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    "Input min tensor must have dimension 1. Recieved ",
+                    input_min_tensor.dims(), "."));
     const Tensor& input_max_tensor = ctx->input(3);
+    OP_REQUIRES(ctx,
+                input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    "Input max tensor must have dimension 1. Recieved ",
+                    input_max_tensor.dims(), "."));
     if (axis_ != -1) {
       OP_REQUIRES(
           ctx, input_min_tensor.dim_size(0) == depth,
@@ -224,6 +245,10 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, axis_ < input.dims(),
+                errors::InvalidArgument(
+                    "Axis requested is larger than input dimensions. Axis: ",
+                    axis_, " Input Dimensions: ", input.dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc
index a523c4b9cd0249..098991e4f436d8 100644
--- a/tensorflow/core/kernels/quantize_op.cc
+++ b/tensorflow/core/kernels/quantize_op.cc
@@ -113,7 +113,50 @@ class QuantizeV2Op : public OpKernel {
 
     int num_slices = 1;
     if (axis_ > -1) {
+      OP_REQUIRES(
+          ctx, input.dims() > axis_,
+          errors::InvalidArgument(
+              "Axis is on a zero-based index, so its value must always be less "
+              "than number of input's dims, but given axis value was ",
+              axis_, " and input's dims was ", input.dims()));
       num_slices = input.dim_size(axis_);
+      OP_REQUIRES(ctx, input_min_range.dims() == 1,
+                  errors::InvalidArgument(
+                      "If axis is specified, min_range must be a 1-D tensor "
+                      "whose size matches the axis dimension of the input and "
+                      "output tensors, but min_range dims are ",
+                      input_min_range.dims()));
+      OP_REQUIRES(ctx, input_min_range.dim_size(0) == num_slices,
+                  errors::InvalidArgument(
+                      "If axis is specified, min_range must be a 1-D tensor "
+                      "whose size matches the axis dimension of the input and "
+                      "output tensors, but min_range is a 1-D tensor of size ",
+                      input_min_range.dim_size(0),
+                      " and input's axis dimension is of size ", num_slices));
+      OP_REQUIRES(ctx, input_max_range.dims() == 1,
+                  errors::InvalidArgument(
+                      "If axis is specified, max_range must be a 1-D tensor "
+                      "whose size matches the axis dimension of the input and "
+                      "output tensors, but max_range dims are ",
+                      input_max_range.dims()));
+      OP_REQUIRES(ctx, input_max_range.dim_size(0) == num_slices,
+                  errors::InvalidArgument(
+                      "If axis is specified, max_range must be a 1-D tensor "
+                      "whose size matches the axis dimension of the input and "
+                      "output tensors, but max_range is a 1-D tensor of size ",
+                      input_max_range.dim_size(0),
+                      " and input's axis dimension is of size ", num_slices));
+    } else {
+      OP_REQUIRES(ctx, input_min_range.NumElements() == 1,
+                  errors::InvalidArgument(
+                      "If axis is not specified, min_range must contain a "
+                      "single float element, but it contains ",
+                      input_min_range.NumElements(), " elements"));
+      OP_REQUIRES(ctx, input_max_range.NumElements() == 1,
+                  errors::InvalidArgument(
+                      "If axis is not specified, max_range must contain a "
+                      "single float element, but it contains ",
+                      input_max_range.NumElements(), " elements"));
     }
 
     const TensorShape& minmax_shape = ctx->input(1).shape();
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 55c69de7d3ea6c..b186f00f15c061 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -538,6 +538,8 @@ class QuantizedAddOp : public OpKernel {
         tensor_min = min_x;
         tensor_max = max_x;
       }
+      OP_REQUIRES(context, vector_num_elements > 0,
+                  errors::InvalidArgument("Must have some elements to add"));
       VectorTensorAddition<T, Toutput>(
           vector_data, vector_min, vector_max, vector_num_elements, tensor_data,
           tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value,
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op.cc b/tensorflow/core/kernels/quantized_batch_norm_op.cc
index b03da7ad17fab4..6dfe07f97a4007 100644
--- a/tensorflow/core/kernels/quantized_batch_norm_op.cc
+++ b/tensorflow/core/kernels/quantized_batch_norm_op.cc
@@ -173,20 +173,50 @@ class QuantizedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float input_min = context->input(1).flat<float>()(0);
-    const float input_max = context->input(2).flat<float>()(0);
+    const auto& input_min_tensor = context->input(1);
+    OP_REQUIRES(context, input_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("input_min must have 1 element"));
+    const float input_min = input_min_tensor.flat<float>()(0);
+    const auto& input_max_tensor = context->input(2);
+    OP_REQUIRES(context, input_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("input_max must have 1 element"));
+    const float input_max = input_max_tensor.flat<float>()(0);
     const Tensor& mean = context->input(3);
-    const float mean_min = context->input(4).flat<float>()(0);
-    const float mean_max = context->input(5).flat<float>()(0);
+    const auto& mean_min_tensor = context->input(4);
+    OP_REQUIRES(context, mean_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("mean_min must have 1 element"));
+    const float mean_min = mean_min_tensor.flat<float>()(0);
+    const auto& mean_max_tensor = context->input(5);
+    OP_REQUIRES(context, mean_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("mean_max must have 1 element"));
+    const float mean_max = mean_max_tensor.flat<float>()(0);
     const Tensor& var = context->input(6);
-    const float var_min = context->input(7).flat<float>()(0);
-    const float var_max = context->input(8).flat<float>()(0);
+    const auto& var_min_tensor = context->input(7);
+    OP_REQUIRES(context, var_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("var_min must have 1 element"));
+    const float var_min = var_min_tensor.flat<float>()(0);
+    const auto& var_max_tensor = context->input(8);
+    OP_REQUIRES(context, var_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("var_max must have 1 element"));
+    const float var_max = var_max_tensor.flat<float>()(0);
     const Tensor& beta = context->input(9);
-    const float beta_min = context->input(10).flat<float>()(0);
-    const float beta_max = context->input(11).flat<float>()(0);
+    const auto& beta_min_tensor = context->input(10);
+    OP_REQUIRES(context, beta_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("beta_min must have 1 element"));
+    const float beta_min = beta_min_tensor.flat<float>()(0);
+    const auto& beta_max_tensor = context->input(11);
+    OP_REQUIRES(context, beta_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("beta_max must have 1 element"));
+    const float beta_max = beta_max_tensor.flat<float>()(0);
     const Tensor& gamma = context->input(12);
-    const float gamma_min = context->input(13).flat<float>()(0);
-    const float gamma_max = context->input(14).flat<float>()(0);
+    const auto& gamma_min_tensor = context->input(13);
+    OP_REQUIRES(context, gamma_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("gamma_min must have 1 element"));
+    const float gamma_min = gamma_min_tensor.flat<float>()(0);
+    const auto& gamma_max_tensor = context->input(14);
+    OP_REQUIRES(context, gamma_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("gamma_max must have 1 element"));
+    const float gamma_max = gamma_max_tensor.flat<float>()(0);
 
     OP_REQUIRES(context, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
@@ -203,6 +233,33 @@ class QuantizedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, gamma.dims() == 1,
                 errors::InvalidArgument("gamma must be 1-dimensional",
                                         gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument("Must have at least a mean value",
+                                        gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument("Must have at least a mean value"));
+    const auto last_dim = input.shape().dims() - 1;
+    OP_REQUIRES(context,
+                mean.shape().dim_size(0) == input.shape().dim_size(last_dim),
+                errors::InvalidArgument("Must provide as many means as the "
+                                        "last dimension of the input tensor: ",
+                                        mean.shape().DebugString(), " vs. ",
+                                        input.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == var.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and variance tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", var.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == beta.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and beta tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", beta.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == gamma.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and gamma tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", gamma.shape().DebugString()));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index 5457d290c2559c..db0e21a498011d 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -56,6 +56,8 @@ class QuantizedBiasAddOp : public OpKernel {
             "Must provide as many biases as the last dimension "
             "of the input tensor: ",
             bias.shape().DebugString(), " vs. ", input.shape().DebugString()));
+    OP_REQUIRES(context, bias.NumElements() > 0,
+                errors::InvalidArgument("Must provide at least 1 bias"));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index a4d36cca3e4088..a339de8cfc8fa3 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -18,6 +18,8 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include "tensorflow/core/platform/errors.h"
+
 #define EIGEN_USE_THREADS
 
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
@@ -227,8 +229,12 @@ class Im2ColConvFunctor {
       return;
     }
 
-    CHECK_GT(output_width, 0);
-    CHECK_GT(output_height, 0);
+    OP_REQUIRES(
+        context, output_width > 0,
+        errors::InvalidArgument("output_width must be strictly positive"));
+    OP_REQUIRES(
+        context, output_height > 0,
+        errors::InvalidArgument("output_height must be strictly positive"));
     int filter_left_offset;
     int filter_top_offset;
     if (padding == VALID) {
@@ -255,6 +261,9 @@ class Im2ColConvFunctor {
     // by the width, then the height. This is the standard memory order in the
     // image world if it helps to visualize it.
     const int filter_value_count = filter_width * filter_height * input_depth;
+    OP_REQUIRES(context, filter_value_count > 0,
+                errors::InvalidArgument(
+                    "filter patch must contain at least one element"));
     const int64 patches_per_chunk =
         kMaxChunkSize / (filter_value_count * sizeof(T1));
     const int64 chunk_value_count =
diff --git a/tensorflow/core/kernels/quantized_mul_op.cc b/tensorflow/core/kernels/quantized_mul_op.cc
index 4e191f162662bb..22cff8939449a6 100644
--- a/tensorflow/core/kernels/quantized_mul_op.cc
+++ b/tensorflow/core/kernels/quantized_mul_op.cc
@@ -284,10 +284,22 @@ class QuantizedMulOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& x = context->input(0);
     const Tensor& y = context->input(1);
-    const float min_x = context->input(2).flat<float>()(0);
-    const float max_x = context->input(3).flat<float>()(0);
-    const float min_y = context->input(4).flat<float>()(0);
-    const float max_y = context->input(5).flat<float>()(0);
+    auto& min_x_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
+                errors::InvalidArgument("min_x must be a scalar"));
+    const float min_x = min_x_tensor.flat<float>()(0);
+    auto& max_x_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
+                errors::InvalidArgument("max_x must be a scalar"));
+    const float max_x = max_x_tensor.flat<float>()(0);
+    auto& min_y_tensor = context->input(4);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
+                errors::InvalidArgument("min_y must be a scalar"));
+    const float min_y = min_y_tensor.flat<float>()(0);
+    auto& max_y_tensor = context->input(5);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
+                errors::InvalidArgument("max_y must be a scalar"));
+    const float max_y = max_y_tensor.flat<float>()(0);
 
     BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
     if (!bcast.IsValid()) {
@@ -347,6 +359,11 @@ class QuantizedMulOp : public OpKernel {
         tensor_num_elements = x.NumElements();
         tensor_offset = offset_x;
       }
+      if (vector_num_elements == 0) {
+        context->SetStatus(
+            errors::InvalidArgument("vector must have at least 1 element"));
+        return;
+      }
       VectorTensorMultiply<T, Toutput>(
           vector_data, vector_offset, vector_num_elements, tensor_data,
           tensor_offset, tensor_num_elements, z_data);
diff --git a/tensorflow/core/kernels/quantized_reshape_op.cc b/tensorflow/core/kernels/quantized_reshape_op.cc
index bd76c94edeea7a..682f4aaa1f79e7 100644
--- a/tensorflow/core/kernels/quantized_reshape_op.cc
+++ b/tensorflow/core/kernels/quantized_reshape_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/reshape_op.h"
@@ -30,9 +31,29 @@ class QuantizedReshapeOp : public ReshapeOp {
   void Compute(OpKernelContext* ctx) override {
     // This call processes inputs 1 and 2 to write output 0.
     ReshapeOp::Compute(ctx);
+    if (!ctx->status().ok()) {
+      return;
+    }
+
+    const auto& input_min_float_tensor = ctx->input(2);
+    const auto& input_min_float_shape = input_min_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_min_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_min_float_shape) &&
+                     (input_min_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    "input_min must be a scalar or a vector of 1 element"));
+    const float input_min_float = input_min_float_tensor.flat<float>()(0);
+    const auto& input_max_float_tensor = ctx->input(3);
+    const auto& input_max_float_shape = input_max_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_max_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_max_float_shape) &&
+                     (input_max_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    "input_max must be a scalar or a vector of 1 element"));
+    const float input_max_float = input_max_float_tensor.flat<float>()(0);
 
-    const float input_min_float = ctx->input(2).flat<float>()(0);
-    const float input_max_float = ctx->input(3).flat<float>()(0);
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min));
     output_min->flat<float>()(0) = input_min_float;
diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
index da0a35a6554b25..5970c0e9480c92 100644
--- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
+++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
@@ -64,6 +64,8 @@ inline void ComputeInterpolationWeights(
         std::max(static_cast<int64>(in_f), static_cast<int64>(0));
     interpolation->upper[i] =
         std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
+    interpolation->lower[i] =
+        std::min(interpolation->lower[i], interpolation->upper[i]);
     interpolation->lerp[i] = in - in_f;
     interpolation->ilerp[i] =
         static_cast<T_SCALE>((in - in_f) * (1 << resolution));
@@ -701,8 +703,14 @@ class QuantizedResizeBilinearOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float in_min = context->input(2).flat<float>()(0);
-    const float in_max = context->input(3).flat<float>()(0);
+    const auto& in_min_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()),
+                errors::InvalidArgument("min must be a scalar"));
+    const float in_min = in_min_tensor.flat<float>()(0);
+    const auto& in_max_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()),
+                errors::InvalidArgument("max must be a scalar"));
+    const float in_max = in_max_tensor.flat<float>()(0);
 
     ImageResizerState st(align_corners_, false);
     st.ValidateAndCreateOutput(context, input);
diff --git a/tensorflow/core/kernels/ragged_cross_op.cc b/tensorflow/core/kernels/ragged_cross_op.cc
index ea65c0ee2b5b21..5dfe93f4166592 100644
--- a/tensorflow/core/kernels/ragged_cross_op.cc
+++ b/tensorflow/core/kernels/ragged_cross_op.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/util/util.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -466,16 +467,45 @@ class RaggedCrossOp : public OpKernel {
     int next_dense = 0;
     for (char c : input_order_) {
       if (c == 'R') {
+        if (next_ragged >= ragged_values_list.size())
+          return errors::InvalidArgument(
+              "input_order \"", input_order_,
+              "\" specifies reading a ragged tensor value at index ",
+              next_ragged, " from a list of ", ragged_values_list.size(),
+              " values.");
+        if (next_ragged >= ragged_splits_list.size())
+          return errors::InvalidArgument(
+              "input_order \"", input_order_,
+              "\" specifies reading a ragged tensor split at index ",
+              next_ragged, " from a list of ", ragged_splits_list.size(),
+              " splits.");
         TF_RETURN_IF_ERROR(BuildRaggedFeatureReader(
             ragged_values_list[next_ragged], ragged_splits_list[next_ragged],
             features));
         next_ragged++;
       } else if (c == 'S') {
+        if (next_sparse >= sparse_values_list.size())
+          return errors::InvalidArgument(
+              "input_order \"", input_order_,
+              "\" specifies reading a sparse tensor value at index ",
+              next_sparse, " from a list of ", sparse_values_list.size(),
+              " values.");
+        if (next_sparse >= sparse_indices_list.size())
+          return errors::InvalidArgument(
+              "input_order \"", input_order_,
+              "\" specifies reading a sparse tensor index at index ",
+              next_sparse, " from a list of ", sparse_indices_list.size(),
+              " indices.");
         TF_RETURN_IF_ERROR(BuildSparseFeatureReader(
             sparse_indices_list[next_sparse], sparse_values_list[next_sparse],
             batch_size, features));
         next_sparse++;
       } else if (c == 'D') {
+        if (next_dense >= dense_list.size())
+          return errors::InvalidArgument(
+              "input_order \"", input_order_,
+              "\" specifies reading a dense tensor at index ", next_dense,
+              " from a list of ", dense_list.size(), " tensors.");
         TF_RETURN_IF_ERROR(
             BuildDenseFeatureReader(dense_list[next_dense++], features));
       } else {
diff --git a/tensorflow/core/kernels/ragged_gather_op.cc b/tensorflow/core/kernels/ragged_gather_op.cc
index 3bf82cba050e3b..d6d51c770bbb7a 100644
--- a/tensorflow/core/kernels/ragged_gather_op.cc
+++ b/tensorflow/core/kernels/ragged_gather_op.cc
@@ -58,15 +58,21 @@ class RaggedGatherOpBase : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     // Get the input Tensors.
+
     OpInputList params_nested_splits_in;
     OP_REQUIRES_OK(context, context->input_list("params_nested_splits",
                                                 &params_nested_splits_in));
+    OP_REQUIRES(
+        context, params_nested_splits_in.size() > 0,
+        errors::InvalidArgument("params_nested_splits must be non empty"));
+
     const Tensor& params_dense_values_in =
         context->input(params_nested_splits_in.size());
     const Tensor& indices_in =
         context->input(params_nested_splits_in.size() + 1);
 
-    DCHECK_GT(params_nested_splits_in.size(), 0);  // Enforced by REGISTER_OP.
+    OP_REQUIRES(context, params_nested_splits_in[0].dims() > 0,
+                errors::InvalidArgument("Split tensors must not be scalars"));
     SPLITS_TYPE num_params = params_nested_splits_in[0].dim_size(0) - 1;
     OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params));
 
diff --git a/tensorflow/core/kernels/ragged_tensor_from_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_from_variant_op.cc
index d9993bb6d3907a..c481d90638e4e2 100644
--- a/tensorflow/core/kernels/ragged_tensor_from_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_from_variant_op.cc
@@ -174,7 +174,23 @@ Status NestedStackRaggedTensors(
   auto output_values_flat =
       output_ragged->mutable_values()->flat_outer_dims<VALUE_TYPE, 2>();
   int values_index = 0;
+
+  TensorShape expected_value_shape = component_values_shape;
+  expected_value_shape.RemoveDim(0);
+
   for (int i = 0; i < ragged_components.size(); i++) {
+    // Check that the flat_values tensor shape is compatible.
+    TensorShape value_shape = ragged_components[i].values().shape();
+    value_shape.RemoveDim(0);
+    if (value_shape != expected_value_shape) {
+      return errors::InvalidArgument(
+          "All flat_values must have compatible shapes.  Shape at index 0: ",
+          expected_value_shape, ".  Shape at index ", i, ": ", value_shape,
+          ".  If you are using tf.map_fn, then you may need to specify an "
+          "explicit fn_output_signature with appropriate ragged_rank, and/or "
+          "convert output tensors to RaggedTensors.");
+    }
+
     auto component_values_flat =
         ragged_components[i].values().flat_outer_dims<VALUE_TYPE, 2>();
     int num_inner_elements = ragged_components[i].values().NumElements();
diff --git a/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc b/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc
index 39b530f4a15ead..336a38fa58fc8b 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_sparse_kernel.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -38,7 +39,8 @@ class RaggedTensorToSparseOp : public OpKernel {
     OP_REQUIRES_OK(
         context, context->input_list("rt_nested_splits", &rt_nested_splits_in));
     const int rt_nested_splits_len = rt_nested_splits_in.size();
-    DCHECK_GT(rt_nested_splits_len, 0);  // Enforced by REGISTER_OP.
+    OP_REQUIRES(context, rt_nested_splits_len > 0,
+                errors::InvalidArgument("rt_nested_splits must be non empty"));
     std::vector<ConstFlatSplits> rt_nested_splits;
     rt_nested_splits.reserve(rt_nested_splits_len);
     for (int i = 0; i < rt_nested_splits_len; ++i) {
@@ -161,6 +163,14 @@ class RaggedTensorToSparseOp : public OpKernel {
       if (rt_nested_splits[i](0) != 0) {
         return InvalidArgument("First value of ragged splits must be 0.");
       }
+      for (int j = 1; j < rt_nested_splits[i].size(); ++j) {
+        if (rt_nested_splits[i](j) < rt_nested_splits[i](j - 1)) {
+          return InvalidArgument(
+              "Ragged splits should be non decreasing, but we got ",
+              rt_nested_splits[i](j - 1), " followed by ",
+              rt_nested_splits[i](j));
+        }
+      }
       if (i > 0) {
         SPLITS_TYPE last_split =
             rt_nested_splits[i - 1](rt_nested_splits[i - 1].size() - 1);
diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
index 28898c65ca7976..1749a6e24784d6 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
@@ -207,7 +207,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     DCHECK_EQ(result->size(), first_dimension);
   }
 
-  void CalculateOutputIndexRowSplit(
+  Status CalculateOutputIndexRowSplit(
       const RowPartitionTensor& row_split,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
@@ -232,9 +232,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
         result->push_back(-1);
       }
     }
-    if (row_split_size > 0) {
-      DCHECK_EQ(result->size(), row_split(row_split_size - 1));
+    if (row_split_size > 0 && result->size() != row_split(row_split_size - 1)) {
+      return errors::InvalidArgument("Invalid row split size.");
     }
+
+    return Status::OK();
   }
 
   // Calculate the output index of the first element of a list.
@@ -258,7 +260,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   // result[6] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[8] = parent_output_index[value_rowids[7]]
-  void CalculateOutputIndexValueRowID(
+  Status CalculateOutputIndexValueRowID(
       const RowPartitionTensor& value_rowids,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
@@ -266,12 +268,18 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     const INDEX_TYPE index_size = value_rowids.size();
     result->reserve(index_size);
     if (index_size == 0) {
-      return;
+      return Status::OK();
     }
 
     INDEX_TYPE current_output_column = 0;
     INDEX_TYPE current_value_rowid = value_rowids(0);
-    DCHECK_LT(current_value_rowid, parent_output_index.size());
+
+    if (current_value_rowid >= parent_output_index.size()) {
+      return errors::InvalidArgument(
+          "Got current_value_rowid=", current_value_rowid,
+          " which is not less than ", parent_output_index.size());
+    }
+
     INDEX_TYPE current_output_index = parent_output_index[current_value_rowid];
     result->push_back(current_output_index);
     for (INDEX_TYPE i = 1; i < index_size; ++i) {
@@ -288,12 +296,23 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
       } else {
         current_output_column = 0;
         current_value_rowid = next_value_rowid;
-        DCHECK_LT(next_value_rowid, parent_output_index.size());
+
+        if (next_value_rowid >= parent_output_index.size()) {
+          return errors::InvalidArgument(
+              "Got next_value_rowid=", next_value_rowid,
+              " which is not less than ", parent_output_index.size());
+        }
+
         current_output_index = parent_output_index[next_value_rowid];
       }
       result->push_back(current_output_index);
     }
-    DCHECK_EQ(result->size(), value_rowids.size());
+
+    if (result->size() != value_rowids.size()) {
+      return errors::InvalidArgument("Invalid row ids.");
+    }
+
+    return Status::OK();
   }
 
   Status CalculateOutputIndex(OpKernelContext* context, int dimension,
@@ -306,15 +325,19 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     auto partition_type = GetRowPartitionTypeByDimension(dimension);
     switch (partition_type) {
       case RowPartitionType::VALUE_ROWIDS:
-        CalculateOutputIndexValueRowID(
+        return CalculateOutputIndexValueRowID(
             row_partition_tensor, parent_output_index, output_index_multiplier,
             output_size, result);
-        return tensorflow::Status::OK();
       case RowPartitionType::ROW_SPLITS:
-        CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index,
-                                     output_index_multiplier, output_size,
-                                     result);
-        return tensorflow::Status::OK();
+        if (row_partition_tensor.size() - 1 > parent_output_index.size()) {
+          return errors::InvalidArgument(
+              "Row partition size is greater than output size: ",
+              row_partition_tensor.size() - 1, " > ",
+              parent_output_index.size());
+        }
+        return CalculateOutputIndexRowSplit(
+            row_partition_tensor, parent_output_index, output_index_multiplier,
+            output_size, result);
       default:
         return errors::InvalidArgument(
             "Unsupported partition type:",
@@ -325,6 +348,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   Status GetFirstDimensionSize(OpKernelContext* context, INDEX_TYPE* result) {
     const Tensor first_partition_tensor =
         context->input(kFirstPartitionInputIndex);
+    if (row_partition_types_.empty()) {
+      return errors::InvalidArgument("No row_partition_types given.");
+    }
     const RowPartitionType first_partition_type = row_partition_types_[0];
     switch (first_partition_type) {
       case RowPartitionType::FIRST_DIM_SIZE:
@@ -345,6 +371,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     INDEX_TYPE first_dimension;
+    const Tensor first_partition_tensor =
+        context->input(kFirstPartitionInputIndex);
+    OP_REQUIRES(context, first_partition_tensor.NumElements() > 0,
+                errors::InvalidArgument("Invalid first partition input. Tensor "
+                                        "requires at least one element."));
     OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension));
     vector<INDEX_TYPE> output_size;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
index 549dc68dfbf87c..ab86863e3a987f 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
@@ -157,8 +157,19 @@ class RaggedTensorToVariantOp : public OpKernel {
       return;
     }
 
+    // Checked here instead of at input in case batched_input_ is false
+    OP_REQUIRES(context, ragged_nested_splits_len > 0,
+                errors::InvalidArgument(
+                    "rt_nested_splits must be a list of one or more, but "
+                    "received rt_nested_splits of length 0."));
+
     // Unbatch the Ragged Tensor and encode the components.
     std::vector<RaggedTensorVariant> unbatched_ragged_input;
+    auto batched_splits_top_vec =
+        batched_ragged_input.splits(0).vec<SPLIT_TYPE>();
+    int num_components = batched_splits_top_vec.size() - 1;
+    OP_REQUIRES(context, num_components >= 0,
+                errors::Internal("Invalid split argument."));
     OP_REQUIRES_OK(context, UnbatchRaggedZerothDim<VALUE_TYPE, SPLIT_TYPE>(
                                 batched_ragged_input, &unbatched_ragged_input));
 
diff --git a/tensorflow/core/kernels/requantization_range_op.cc b/tensorflow/core/kernels/requantization_range_op.cc
index cc6e891a6b352b..f6e217499d1983 100644
--- a/tensorflow/core/kernels/requantization_range_op.cc
+++ b/tensorflow/core/kernels/requantization_range_op.cc
@@ -46,6 +46,10 @@ class RequantizationRangeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, ctx->input(1).NumElements() > 0,
+                errors::InvalidArgument("Input min must not be empty."));
+    OP_REQUIRES(ctx, ctx->input(2).NumElements() > 0,
+                errors::InvalidArgument("Input max must not be empty."));
     const float input_min_float = ctx->input(1).flat<float>()(0);
     const float input_max_float = ctx->input(2).flat<float>()(0);
     Tensor* output_min = nullptr;
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 950a80b6b2dd30..2714233a746c3d 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -642,6 +642,11 @@ class ResourceGatherOp : public OpKernel {
     OP_REQUIRES(
         c, TensorShapeUtils::IsVectorOrHigher(params.shape()),
         errors::InvalidArgument("params must be at least 1 dimensional"));
+    OP_REQUIRES(
+        c, params.shape().dims() >= batch_dims_,
+        errors::InvalidArgument("params must have at least ", batch_dims_,
+                                " (batch_dims) dimensions but it has shape ",
+                                params.shape().DebugString()));
 
     // Check that we have enough index space
     const int64 N = indices.NumElements();
@@ -687,7 +692,8 @@ class ResourceGatherOp : public OpKernel {
         copy_functor(c->eigen_device<Device>(), tmp_indices.flat<Index>(),
                      indices.flat<Index>());
 
-        AddBatchOffsets(&tmp_indices, params);
+        AddBatchOffsets(c, &tmp_indices, params);
+        if (!c->status().ok()) return;
         op_indices = &tmp_indices;
       }
 
@@ -719,11 +725,17 @@ class ResourceGatherOp : public OpKernel {
   // Example: batch_dims = 1, indices = [[0, 1, 2], [0, 1, 2]]
   // If indexing into a params dimension of size 4, then the indices will become
   // [0, 1, 2, 4, 5, 6]
-  void AddBatchOffsets(Tensor* indices, const Tensor& params) {
+  void AddBatchOffsets(OpKernelContext* ctx, Tensor* indices,
+                       const Tensor& params) {
     int64 batch_size = 1;  // The size of all batch dimensions.
     for (int idx = 0; idx < batch_dims_; ++idx) {
       batch_size *= params.dim_size(idx);
     }
+    OP_REQUIRES(
+        ctx, batch_size != 0,
+        errors::InvalidArgument(
+            "Inner size of indices would result in batch_size of 0 and a ",
+            "division by 0 in the implementation. This is illegal"));
 
     auto indices_flat = indices->flat<Index>();
     int64 const index_inner_size = indices->NumElements() / batch_size;
@@ -843,6 +855,35 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU);
 #undef REGISTER_GATHER_ND_ALL_INDICES
 #undef REGISTER_GATHER_ND_FULL
 
+namespace {
+
+template <typename Device>
+bool isCPUDevice() {
+  return false;
+}
+
+template <>
+bool isCPUDevice<CPUDevice>() {
+  return true;
+}
+
+template <typename T>
+bool ValidateInput(const Tensor& updates) {
+  const auto updates_flat = updates.flat<T>();
+  const T zero(0);
+  for (int i = 0; i < updates.NumElements(); i++) {
+    if (updates_flat(i) == zero) return false;
+  }
+  return true;
+}
+
+template <>
+bool ValidateInput<Variant>(const Tensor& updates) {
+  return true;
+}
+
+}  // namespace
+
 template <typename Device, typename T, typename Index, scatter_op::UpdateOp op>
 class ResourceScatterUpdateOp : public OpKernel {
  public:
@@ -909,6 +950,12 @@ class ResourceScatterUpdateOp : public OpKernel {
                                 " indexing: ", params->dim_size(0), " > ",
                                 std::numeric_limits<Index>::max()));
 
+    // Prevent division by 0
+    if (isCPUDevice<Device>() && op == tensorflow::scatter_op::UpdateOp::DIV) {
+      OP_REQUIRES(c, ValidateInput<T>(updates),
+                  errors::InvalidArgument("updates must not contain 0"));
+    }
+
     if (N > 0) {
       auto indices_flat = indices.flat<Index>();
       auto params_flat = params->flat_outer_dims<T>();
@@ -925,11 +972,12 @@ class ResourceScatterUpdateOp : public OpKernel {
                         params->dim_size(0), ")"));
       } else {
         int64 num_updates = updates.NumElements();
-        OP_REQUIRES(c, num_updates % N == 0,
-                    errors::InvalidArgument(
-                        "shape of indices (", indices.shape().DebugString(),
-                        ") is not compatible with the shape of updates (",
-                        updates.shape().DebugString(), ")"));
+        OP_REQUIRES(
+            c, TensorShapeUtils::StartsWith(updates.shape(), indices.shape()),
+            errors::InvalidArgument(
+                "The shape of indices (", indices.shape().DebugString(),
+                ") must be a prefix of the shape of updates (",
+                updates.shape().DebugString(), ")"));
         auto updates_flat = updates.shaped<T, 2>({N, num_updates / N});
 
         functor::ScatterFunctor<Device, T, Index, op> functor;
diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc
index 4b4aa05fc7be00..32cd5ccdedf2f6 100644
--- a/tensorflow/core/kernels/reverse_op.cc
+++ b/tensorflow/core/kernels/reverse_op.cc
@@ -155,6 +155,12 @@ class ReverseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
+    // If input is provided, check to make sure the first dimension is valid.
+    if (input.dims() > 0) {
+      OP_REQUIRES(
+          context, input.dim_size(0) != 0,
+          errors::InvalidArgument("Invalid input first dimension. Found 0."));
+    }
     const Tensor& dims = context->input(1);
 
     if (TensorShapeUtils::IsScalar(input.shape())) {
diff --git a/tensorflow/core/kernels/reverse_sequence_op.cc b/tensorflow/core/kernels/reverse_sequence_op.cc
index b5b62bc76ca524..1282deb26e8cd6 100644
--- a/tensorflow/core/kernels/reverse_sequence_op.cc
+++ b/tensorflow/core/kernels/reverse_sequence_op.cc
@@ -115,6 +115,10 @@ class ReverseSequenceOp : public OpKernel {
       : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("batch_dim", &batch_dim_));
     OP_REQUIRES_OK(context, context->GetAttr("seq_dim", &seq_dim_));
+    OP_REQUIRES(context, batch_dim_ >= 0,
+                errors::InvalidArgument("Invalid batch_dim ", batch_dim_));
+    OP_REQUIRES(context, seq_dim_ >= 0,
+                errors::InvalidArgument("Invalid seq_dim ", seq_dim_));
   }
 
   void Compute(OpKernelContext* context) override {
diff --git a/tensorflow/core/kernels/save_restore_tensor.cc b/tensorflow/core/kernels/save_restore_tensor.cc
index 020e38c3462610..6cc3a28437feeb 100644
--- a/tensorflow/core/kernels/save_restore_tensor.cc
+++ b/tensorflow/core/kernels/save_restore_tensor.cc
@@ -151,11 +151,18 @@ void RestoreTensor(OpKernelContext* context,
         context, size == 1,
         errors::InvalidArgument(
             "Input 0 (file_pattern) must be a string scalar; got a tensor of ",
-            size, "elements"));
+            size, " elements"));
   }
   const string& file_pattern = file_pattern_t.flat<tstring>()(0);
 
   const Tensor& tensor_name_t = context->input(1);
+  {
+    const int64_t size = tensor_name_t.NumElements();
+    OP_REQUIRES(context, size > restore_index,
+                errors::InvalidArgument(
+                    "Input 1 (file_pattern) must be a have at least ",
+                    restore_index + 1, " elements"));
+  }
   const string& tensor_name = tensor_name_t.flat<tstring>()(restore_index);
 
   // If we cannot find a cached reader we will allocate our own.
diff --git a/tensorflow/core/kernels/save_restore_v2_ops.cc b/tensorflow/core/kernels/save_restore_v2_ops.cc
index 44738d0f0cca3c..809a26030b850e 100644
--- a/tensorflow/core/kernels/save_restore_v2_ops.cc
+++ b/tensorflow/core/kernels/save_restore_v2_ops.cc
@@ -98,6 +98,7 @@ class SaveV2 : public OpKernel {
     const Tensor& shape_and_slices = context->input(2);
     ValidateInputs(true /* is save op */, context, prefix, tensor_names,
                    shape_and_slices);
+    if (!context->status().ok()) return;
 
     const int kFixedInputs = 3;  // Prefix, tensor names, shape_and_slices.
     const int num_tensors = static_cast<int>(tensor_names.NumElements());
@@ -177,6 +178,7 @@ class RestoreV2 : public OpKernel {
                                         " expected dtypes."));
     ValidateInputs(false /* not save op */, context, prefix, tensor_names,
                    shape_and_slices);
+    if (!context->status().ok()) return;
 
     const string& prefix_string = prefix.scalar<tstring>()();
 
diff --git a/tensorflow/core/kernels/sdca_internal.cc b/tensorflow/core/kernels/sdca_internal.cc
index cbc754af0e9bb1..ed7149bf8365d8 100644
--- a/tensorflow/core/kernels/sdca_internal.cc
+++ b/tensorflow/core/kernels/sdca_internal.cc
@@ -99,6 +99,10 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpInputList sparse_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("sparse_weights", &sparse_weights_inputs));
+  if (sparse_indices_inputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        "sparse_indices and sparse_weights must have the same length, got ",
+        sparse_indices_inputs.size(), " and ", sparse_weights_inputs.size());
   OpInputList dense_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("dense_weights", &dense_weights_inputs));
@@ -106,10 +110,20 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpOutputList sparse_weights_outputs;
   TF_RETURN_IF_ERROR(context->output_list("out_delta_sparse_weights",
                                           &sparse_weights_outputs));
+  if (sparse_weights_outputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        "out_delta_sparse_weights and sparse_weights must have the same "
+        "length, got ",
+        sparse_weights_outputs.size(), " and ", sparse_weights_inputs.size());
 
   OpOutputList dense_weights_outputs;
   TF_RETURN_IF_ERROR(
       context->output_list("out_delta_dense_weights", &dense_weights_outputs));
+  if (dense_weights_outputs.size() != dense_weights_inputs.size())
+    return errors::InvalidArgument(
+        "out_delta_dense_weights and dense_weights must have the same length, "
+        "got ",
+        dense_weights_outputs.size(), " and ", dense_weights_inputs.size());
 
   for (int i = 0; i < sparse_weights_inputs.size(); ++i) {
     Tensor* delta_t;
@@ -327,13 +341,28 @@ Status Examples::Initialize(OpKernelContext* const context,
   OpInputList sparse_example_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list("sparse_example_indices",
                                          &sparse_example_indices_inputs));
+  if (sparse_example_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        "Expected ", num_sparse_features,
+        " tensors in sparse_example_indices but got ",
+        sparse_example_indices_inputs.size());
   OpInputList sparse_feature_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list("sparse_feature_indices",
                                          &sparse_feature_indices_inputs));
+  if (sparse_feature_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        "Expected ", num_sparse_features,
+        " tensors in sparse_feature_indices but got ",
+        sparse_feature_indices_inputs.size());
   OpInputList sparse_feature_values_inputs;
   if (num_sparse_features_with_values > 0) {
     TF_RETURN_IF_ERROR(context->input_list("sparse_feature_values",
                                            &sparse_feature_values_inputs));
+    if (sparse_feature_values_inputs.size() != num_sparse_features_with_values)
+      return errors::InvalidArgument(
+          "Expected ", num_sparse_features_with_values,
+          " tensors in sparse_feature_values but got ",
+          sparse_feature_values_inputs.size());
   }
 
   const Tensor* example_weights_t;
@@ -351,6 +380,11 @@ Status Examples::Initialize(OpKernelContext* const context,
   const Tensor* example_labels_t;
   TF_RETURN_IF_ERROR(context->input("example_labels", &example_labels_t));
   auto example_labels = example_labels_t->flat<float>();
+  if (example_labels.size() != num_examples) {
+    return errors::InvalidArgument("Expected ", num_examples,
+                                   " example labels but got ",
+                                   example_labels.size());
+  }
 
   OpInputList dense_features_inputs;
   TF_RETURN_IF_ERROR(
@@ -400,6 +434,13 @@ Status Examples::CreateSparseFeatureRepresentation(
           sparse_example_indices_inputs[i].template flat<int64>();
       auto feature_indices =
           sparse_feature_indices_inputs[i].template flat<int64>();
+      if (example_indices.size() != feature_indices.size()) {
+        mutex_lock l(mu);
+        result = errors::InvalidArgument(
+            "Found mismatched example_indices and feature_indices [",
+            example_indices, "] vs [", feature_indices, "]");
+        return;
+      }
 
       // Parse features for each example. Features for a particular example
       // are at the offsets (start_id, end_id]
diff --git a/tensorflow/core/kernels/searchsorted_op.cc b/tensorflow/core/kernels/searchsorted_op.cc
index 01e221dc471c4d..5f075a6a540e9f 100644
--- a/tensorflow/core/kernels/searchsorted_op.cc
+++ b/tensorflow/core/kernels/searchsorted_op.cc
@@ -86,6 +86,10 @@ class UpperBoundOp : public OpKernel {
     const Tensor& sorted_inputs_t = ctx->input(0);
     const Tensor& values_t = ctx->input(1);
 
+    // inputs must be at least a matrix
+    OP_REQUIRES(
+        ctx, sorted_inputs_t.shape().dims() >= 2,
+        errors::InvalidArgument("sorted input argument must be a matrix"));
     // must have same batch dim_size for both
     OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
                 Status(error::INVALID_ARGUMENT,
@@ -127,6 +131,10 @@ class LowerBoundOp : public OpKernel {
     const Tensor& sorted_inputs_t = ctx->input(0);
     const Tensor& values_t = ctx->input(1);
 
+    // inputs must be at least a matrix
+    OP_REQUIRES(
+        ctx, sorted_inputs_t.shape().dims() >= 2,
+        errors::InvalidArgument("sorted input argument must be a matrix"));
     // must have same batch dim_size for both
     OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
                 Status(error::INVALID_ARGUMENT,
diff --git a/tensorflow/core/kernels/segment_reduction_ops_impl.h b/tensorflow/core/kernels/segment_reduction_ops_impl.h
index 7cf15ef5b72d72..d9accdd7ea816a 100644
--- a/tensorflow/core/kernels/segment_reduction_ops_impl.h
+++ b/tensorflow/core/kernels/segment_reduction_ops_impl.h
@@ -111,7 +111,9 @@ class SegmentReductionOp : public OpKernel {
                 errors::InvalidArgument("segment ids must be >= 0"));
 
     TensorShape output_shape = input.shape();
-    output_shape.set_dim(0, output_rows);
+    // Since we're changing the first dimension of the shape, we need to make
+    // sure the new shape won't overflow.
+    OP_REQUIRES_OK(context, output_shape.SetDimWithStatus(0, output_rows));
 
     // Note that we do not initialize the output buffer with a default value, so
     // we need to explicitly set missing indices to the default value.
@@ -284,7 +286,10 @@ class SegmentSumGPUOp : public AsyncOpKernel {
                         done);
 
       TensorShape output_shape = input.shape();
-      output_shape.set_dim(0, output_rows);
+      // Since we're changing the first dimension of the shape, we need to make
+      // sure the new shape won't overflow.
+      OP_REQUIRES_OK_ASYNC(context,
+                           output_shape.SetDimWithStatus(0, output_rows), done);
 
       Tensor* output = nullptr;
       OP_REQUIRES_OK_ASYNC(
diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc
index d15f95125e0ac3..ebf31d61e5f85b 100644
--- a/tensorflow/core/kernels/sequence_ops.cc
+++ b/tensorflow/core/kernels/sequence_ops.cc
@@ -71,13 +71,17 @@ class RangeOp : public OpKernel {
           errors::InvalidArgument(
               "Requires start >= limit when delta < 0: ", start, "/", limit));
     }
-    int64 size = (std::is_integral<T>::value
-                      ? ((std::abs(limit - start) + std::abs(delta) - 1) /
-                         std::abs(delta))
-                      : std::ceil(std::abs((limit - start) / delta)));
+    int64 size = 0;
+    if (std::is_integral<T>::value) {
+      size = static_cast<int64>(
+          (std::abs(limit - start) + std::abs(delta) - 1) / std::abs(delta));
+    } else {
+      size = static_cast<int64>(std::ceil(std::abs((limit - start) / delta)));
+    }
+    TensorShape shape;
+    OP_REQUIRES_OK(context, shape.AddDimWithStatus(size));
     Tensor* out = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, TensorShape({size}), &out));
+    OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
     auto flat = out->flat<T>();
     T val = start;
     for (int64 i = 0; i < size; ++i) {
diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc
index ee81ad27632622..109acc55722ccb 100644
--- a/tensorflow/core/kernels/session_ops.cc
+++ b/tensorflow/core/kernels/session_ops.cc
@@ -91,7 +91,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-
 class GetSessionTensorOp : public OpKernel {
  public:
   explicit GetSessionTensorOp(OpKernelConstruction* context)
@@ -101,7 +100,11 @@ class GetSessionTensorOp : public OpKernel {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
     Tensor val;
-    OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    "GetSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val));
     ctx->set_output(0, val);
   }
 
@@ -122,7 +125,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-
 class DeleteSessionTensorOp : public OpKernel {
  public:
   explicit DeleteSessionTensorOp(OpKernelConstruction* context)
@@ -131,7 +133,11 @@ class DeleteSessionTensorOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
-    OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    "DeleteSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name));
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp);
diff --git a/tensorflow/core/kernels/sparse/kernels.cc b/tensorflow/core/kernels/sparse/kernels.cc
index 0eea9f1feed5c3..dff9aeb83ccfec 100644
--- a/tensorflow/core/kernels/sparse/kernels.cc
+++ b/tensorflow/core/kernels/sparse/kernels.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 namespace functor {
@@ -63,6 +64,11 @@ Status SparseTensorToCSRSparseMatrixCPUFunctor::operator()(
 
     for (int64 i = 0; i < total_nnz; ++i) {
       // For now, the rows pointers store the corresponding row counts.
+      int64 ix = indices(i, 0) + 1;
+      if (ix >= csr_row_ptr.size()) {
+        return errors::InvalidArgument("Got an index ", ix,
+                                       " that is outside of csr_row_ptr");
+      }
       csr_row_ptr(indices(i, 0) + 1) += 1;
       csr_col_ind(i) = indices(i, 1);
     }
diff --git a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
index 9a939276f0b6cb..47ab252317de5e 100644
--- a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
@@ -17,6 +17,8 @@ limitations under the License.
 #include <numeric>
 #include <vector>
 
+#include "tensorflow/core/framework/op_requires.h"
+
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/Eigen/Core"
@@ -82,8 +84,8 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
 
     int64 num_rows;
     int batch_size;
-    ValidateInputs(ctx, *input_matrix, input_permutation_indices, &batch_size,
-                   &num_rows);
+    OP_REQUIRES_OK(ctx, ValidateInputs(*input_matrix, input_permutation_indices,
+                                       &batch_size, &num_rows));
 
     // Allocate batch pointers.
     Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1}));
@@ -226,49 +228,48 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
   }
 
  private:
-  void ValidateInputs(OpKernelContext* ctx,
-                      const CSRSparseMatrix& sparse_matrix,
-                      const Tensor& permutation_indices, int* batch_size,
-                      int64* num_rows) {
-    OP_REQUIRES(ctx, sparse_matrix.dtype() == DataTypeToEnum<T>::value,
-                errors::InvalidArgument(
-                    "Asked for a CSRSparseMatrix of type ",
-                    DataTypeString(DataTypeToEnum<T>::value),
-                    " but saw dtype: ", DataTypeString(sparse_matrix.dtype())));
+  Status ValidateInputs(const CSRSparseMatrix& sparse_matrix,
+                        const Tensor& permutation_indices, int* batch_size,
+                        int64* num_rows) {
+    if (sparse_matrix.dtype() != DataTypeToEnum<T>::value)
+      return errors::InvalidArgument(
+          "Asked for a CSRSparseMatrix of type ",
+          DataTypeString(DataTypeToEnum<T>::value),
+          " but saw dtype: ", DataTypeString(sparse_matrix.dtype()));
 
     const Tensor& dense_shape = sparse_matrix.dense_shape();
     const int rank = dense_shape.dim_size(0);
-    OP_REQUIRES(ctx, rank == 2 || rank == 3,
-                errors::InvalidArgument("sparse matrix must have rank 2 or 3; ",
-                                        "but dense_shape has size ", rank));
+    if (rank < 2 || rank > 3)
+      return errors::InvalidArgument("sparse matrix must have rank 2 or 3; ",
+                                     "but dense_shape has size ", rank);
     const int row_dim = (rank == 2) ? 0 : 1;
     auto dense_shape_vec = dense_shape.vec<int64>();
     *num_rows = dense_shape_vec(row_dim);
     const int64 num_cols = dense_shape_vec(row_dim + 1);
-    OP_REQUIRES(ctx, *num_rows == num_cols,
-                errors::InvalidArgument("sparse matrix must be square; got: ",
-                                        *num_rows, " != ", num_cols));
+    if (*num_rows != num_cols)
+      return errors::InvalidArgument(
+          "sparse matrix must be square; got: ", *num_rows, " != ", num_cols);
     const TensorShape& perm_shape = permutation_indices.shape();
-    OP_REQUIRES(
-        ctx, perm_shape.dims() + 1 == rank,
-        errors::InvalidArgument(
-            "sparse matrix must have the same rank as permutation; got: ", rank,
-            " != ", perm_shape.dims(), " + 1."));
-    OP_REQUIRES(
-        ctx, perm_shape.dim_size(rank - 2) == *num_rows,
-        errors::InvalidArgument(
-            "permutation must have the same number of elements in each batch "
-            "as the number of rows in sparse matrix; got: ",
-            perm_shape.dim_size(rank - 2), " != ", *num_rows));
+    if (perm_shape.dims() + 1 != rank)
+      return errors::InvalidArgument(
+          "sparse matrix must have the same rank as permutation; got: ", rank,
+          " != ", perm_shape.dims(), " + 1.");
+    if (perm_shape.dim_size(rank - 2) != *num_rows)
+      return errors::InvalidArgument(
+          "permutation must have the same number of elements in each batch "
+          "as the number of rows in sparse matrix; got: ",
+          perm_shape.dim_size(rank - 2), " != ", *num_rows);
 
     *batch_size = sparse_matrix.batch_size();
     if (*batch_size > 1) {
-      OP_REQUIRES(
-          ctx, perm_shape.dim_size(0) == *batch_size,
-          errors::InvalidArgument("permutation must have the same batch size "
-                                  "as sparse matrix; got: ",
-                                  perm_shape.dim_size(0), " != ", *batch_size));
+      if (perm_shape.dim_size(0) != *batch_size)
+        return errors::InvalidArgument(
+            "permutation must have the same batch size "
+            "as sparse matrix; got: ",
+            perm_shape.dim_size(0), " != ", *batch_size);
     }
+
+    return Status::OK();
   }
 };
 
diff --git a/tensorflow/core/kernels/sparse_add_op.cc b/tensorflow/core/kernels/sparse_add_op.cc
index 0cf40a709a39a7..2bd05fa41adc26 100644
--- a/tensorflow/core/kernels/sparse_add_op.cc
+++ b/tensorflow/core/kernels/sparse_add_op.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_util.h"
@@ -43,6 +44,11 @@ class SparseAddOp : public OpKernel {
                     b_indices->shape().DebugString()));
     const int64 a_nnz = a_indices->dim_size(0);
     const int64 b_nnz = b_indices->dim_size(0);
+    const int num_dims = a_indices->dim_size(1);
+    OP_REQUIRES(ctx, b_indices->dim_size(1) == num_dims,
+                errors::InvalidArgument(
+                    "Input indices must have the same dimension, got ",
+                    num_dims, " and ", b_indices->dim_size(1)));
 
     OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t));
     OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t));
@@ -71,6 +77,13 @@ class SparseAddOp : public OpKernel {
                     "Input shapes should be a vector but received shapes ",
                     a_shape->shape().DebugString(), " and ",
                     b_shape->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, a_shape->NumElements() == num_dims,
+        errors::InvalidArgument("Second dimension of a_indices and length of "
+                                "a_shape must match, got ",
+                                num_dims, " and ", a_shape->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument("Tesors must not be empty"));
     OP_REQUIRES(
         ctx, a_shape->IsSameSize(*b_shape),
         errors::InvalidArgument(
@@ -99,7 +112,6 @@ class SparseAddOp : public OpKernel {
     std::vector<std::pair<bool, int64>> entries_to_copy;  // from_a?, idx
     entries_to_copy.reserve(a_nnz + b_nnz);
     std::vector<T> out_values;
-    const int num_dims = a_shape->dim_size(0);
 
     // The input and output sparse tensors are assumed to be ordered along
     // increasing dimension number.
diff --git a/tensorflow/core/kernels/sparse_concat_op.cc b/tensorflow/core/kernels/sparse_concat_op.cc
index 3b2a0cb0f34ed3..d49f92ea556eb2 100644
--- a/tensorflow/core/kernels/sparse_concat_op.cc
+++ b/tensorflow/core/kernels/sparse_concat_op.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -66,13 +67,32 @@ class SparseConcatOp : public OpKernel {
     OP_REQUIRES(context, shapes.size() == N,
                 errors::InvalidArgument("Expected ", N, " input shapes, got ",
                                         shapes.size()));
+    bool overflow_ocurred = false;
     for (int i = 0; i < N; i++) {
+      int64 new_num_elements = 1;
       OP_REQUIRES(context, TensorShapeUtils::IsVector(shapes[i].shape()),
                   errors::InvalidArgument(
                       "Input shapes should be a vector but received shape ",
                       shapes[i].shape().DebugString(), " at position ", i));
+      auto input_shape_vector = shapes[i].vec<int64>();
+      for (int j = 0; j < input_shape_vector.size(); j++) {
+        new_num_elements =
+            MultiplyWithoutOverflow(new_num_elements, input_shape_vector(j));
+        if (new_num_elements < 0) {
+          overflow_ocurred = true;
+          break;
+        }
+      }
+
+      if (overflow_ocurred) {
+        break;
+      }
     }
 
+    OP_REQUIRES(
+        context, !overflow_ocurred,
+        errors::Internal("Encountered overflow from large input shape."));
+
     const TensorShape input_shape(shapes[0].vec<int64>());
     const int input_rank = input_shape.dims();
     const int concat_dim = (concat_dim_attr_ < 0)
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index 583235b4a309b8..43b3bedc745032 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
@@ -460,10 +461,19 @@ int64 CalculateBatchSize(const OpInputList& shapes_list_in,
 Status ValidateInput(const OpInputList& indices_list_in,
                      const OpInputList& values_list_in,
                      const OpInputList& shapes_list_in,
-                     const OpInputList& dense_list_in) {
+                     const OpInputList& dense_list_in,
+                     const DataType& internal_type) {
   const auto size = indices_list_in.size();
+  // Only perform internal_type check for SparseCrossOp.
+  // Check if the internal_type is not invalid before doing so.
+  bool check_type = internal_type != DT_INVALID;
   // Validates indices_list_in OpInputList.
   for (int i = 0; i < size; i++) {
+    if (check_type && indices_list_in[i].dtype() != DT_INT64) {
+      return errors::InvalidArgument("Input indices should be of type ",
+                                     DT_INT64, " but received ",
+                                     indices_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsMatrix(indices_list_in[i].shape())) {
       return errors::InvalidArgument(
           "Input indices should be a matrix but received shape ",
@@ -482,6 +492,14 @@ Status ValidateInput(const OpInputList& indices_list_in,
                                    values_list_in.size());
   }
   for (int i = 0; i < size; i++) {
+    // Make sure to avoid the expected type to be string, but input values to be
+    // int64.
+    if (check_type && internal_type == DT_STRING &&
+        values_list_in[i].dtype() == DT_INT64) {
+      return errors::InvalidArgument("Input values should be of internal type ",
+                                     internal_type, " but received ",
+                                     values_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsVector(values_list_in[i].shape())) {
       return errors::InvalidArgument(
           "Input values should be a vector but received shape ",
@@ -502,6 +520,11 @@ Status ValidateInput(const OpInputList& indices_list_in,
                                    shapes_list_in.size());
   }
   for (int i = 0; i < size; i++) {
+    if (check_type && shapes_list_in[i].dtype() != DT_INT64) {
+      return errors::InvalidArgument("Input shape should be of type ", DT_INT64,
+                                     " but received ",
+                                     shapes_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsVector(shapes_list_in[i].shape())) {
       return errors::InvalidArgument(
           "Input shapes should be a vector but received shape ",
@@ -517,6 +540,14 @@ Status ValidateInput(const OpInputList& indices_list_in,
 
   // Validates dense_list_in OpInputList
   for (int i = 0; i < dense_list_in.size(); ++i) {
+    // Make sure to avoid the expected type to be string, but input values to be
+    // int64.
+    if (check_type && internal_type == DT_STRING &&
+        dense_list_in[i].dtype() == DT_INT64) {
+      return errors::InvalidArgument("Dense inputs should be of internal type ",
+                                     internal_type, " but received ",
+                                     dense_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsMatrix(dense_list_in[i].shape())) {
       return errors::InvalidArgument(
           "Dense inputs should be a matrix but received shape ",
@@ -698,6 +729,7 @@ class SparseCrossOp : public OpKernel {
     int64 signed_hash_key_;
     OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_));
     hash_key_ = static_cast<uint64>(signed_hash_key_);
+    OP_REQUIRES_OK(context, context->GetAttr("internal_type", &internal_type_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -711,8 +743,10 @@ class SparseCrossOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list("dense_inputs", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    DataType internal_type = internal_type_;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     std::vector<std::unique_ptr<ColumnInterface<InternalType>>> columns =
         GenerateColumnsFromInput<InternalType>(indices_list_in, values_list_in,
@@ -756,6 +790,7 @@ class SparseCrossOp : public OpKernel {
  private:
   int64 num_buckets_;
   uint64 hash_key_;
+  DataType internal_type_;
 };
 
 class SparseCrossV2Op : public OpKernel {
@@ -773,8 +808,11 @@ class SparseCrossV2Op : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list("dense_inputs", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    // Set internal_type to invalid_type so that the check will be ignored.
+    DataType internal_type = DT_INVALID;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     const Tensor* sep_t;
     OP_REQUIRES_OK(context, context->input("sep", &sep_t));
@@ -832,8 +870,11 @@ class SparseCrossHashedOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list("dense_inputs", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    // Set internal_type to invalid_type so that the check will be ignored.
+    DataType internal_type = DT_INVALID;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     const Tensor* num_buckets_t;
     OP_REQUIRES_OK(context, context->input("num_buckets", &num_buckets_t));
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
index 3a5e66a0e73ea6..edc238faa98db9 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
@@ -78,6 +78,11 @@ class SparseDenseBinaryOpShared : public OpKernel {
                     "but received shapes: ",
                     values_t->shape().DebugString(), " and ",
                     shape_t->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, values_t->dim_size(0) == indices_t->dim_size(0),
+        errors::InvalidArgument(
+            "The first dimension of values and indices should match. (",
+            values_t->dim_size(0), " vs. ", indices_t->dim_size(0), ")"));
 
     const auto indices_mat = indices_t->matrix<int64>();
     const auto shape_vec = shape_t->vec<int64>();
@@ -109,7 +114,10 @@ class SparseDenseBinaryOpShared : public OpKernel {
     OP_REQUIRES_OK(
         ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, TensorShape({nnz}),
                                 &dense_gathered));
-
+    bool op_is_div = false;
+    if (absl::StrContains(ctx->op_kernel().type_string_view(), "Div")) {
+      op_is_div = true;
+    }
     // Pulls relevant entries from the dense side, with reshape and broadcasting
     // *of the dense side* taken into account.  Use a TensorRef to avoid blowing
     // up memory.
@@ -138,6 +146,12 @@ class SparseDenseBinaryOpShared : public OpKernel {
           errors::InvalidArgument("Provided indices are out-of-bounds w.r.t. " \
                                   "dense side with broadcasted shape"));       \
       dense_gathered_flat(i) = rhs_ref.coeff(idx);                             \
+      if (op_is_div) {                                                         \
+        OP_REQUIRES(ctx, dense_gathered_flat(i) != T(0),                       \
+                    errors::InvalidArgument(                                   \
+                        "SparseDenseCwiseDiv cannot divide by zero,"           \
+                        "but input dense tensor contains zero "));             \
+      }                                                                        \
     }                                                                          \
     break;                                                                     \
   }
diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
index 6d9201d9f87660..243612afe4d331 100644
--- a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
+++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
@@ -22,11 +22,13 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -64,12 +66,18 @@ class SparseFillEmptyRowsOp : public OpKernel {
     OP_REQUIRES(context, TensorShapeUtils::IsVector(values_t.shape()),
                 errors::InvalidArgument("values must be a vector, saw: ",
                                         values_t.shape().DebugString()));
+    OP_REQUIRES(context, indices_t.dim_size(0) == values_t.dim_size(0),
+                errors::InvalidArgument("The length of `values` (", values_t.dim_size(0),
+                                        ") must match the first dimension of `indices` (",
+                                        indices_t.dim_size(0), ")."));
     OP_REQUIRES(context, TensorShapeUtils::IsScalar(default_value_t.shape()),
                 errors::InvalidArgument("default_value must be a scalar, saw: ",
                                         default_value_t.shape().DebugString()));
     // TODO(ebrevdo): add shape checks between values, indices,
     // dense_shape.  Also add check that dense rank > 0.
-
+    // Also add check that dense rank > 0.
+    OP_REQUIRES(context, dense_shape_t.NumElements() != 0,
+                errors::InvalidArgument("Dense shape cannot be empty."));
     const T& default_value = default_value_t.scalar<T>()();
     const auto indices = indices_t.matrix<int64>();
     const auto values = values_t.vec<T>();
diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index f5747854093c95..6bf9dfa3d8bb75 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -980,9 +981,18 @@ class SparseMatMulOp : public OpKernel {
                 errors::InvalidArgument(
                     "Matrix size incompatible: a: ", a.shape().DebugString(),
                     ", b: ", b.shape().DebugString()));
+    OP_REQUIRES(ctx, m >= 0 && n >= 0 && k >= 0,
+                errors::InvalidArgument(
+                    "Matrix dimensions cannot be negative: a: ",
+                    a.shape().DebugString(), ", b: ", b.shape().DebugString()));
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({m, n}), &output));
 
+    // Return early if at least one of the output dimension size is 0.
+    if (m == 0 || n == 0) {
+      return;
+    }
+
     if (k == 0) {
       // If the inner dimension k in the matrix multiplication is zero, we fill
       // the output with zeros.
@@ -1039,6 +1049,10 @@ class SparseMatMulOp : public OpKernel {
     if (transpose_b) {
       // TODO(agarwal): avoid transposing the matrix here and directly handle
       // transpose in CreateDenseSlices.
+      OP_REQUIRES(ctx, right->dim_size(0) != 0,
+                  errors::InvalidArgument("b has an entry 0 in it's shape."));
+      OP_REQUIRES(ctx, right->dim_size(1) != 0,
+                  errors::InvalidArgument("b has an entry 0 in it's shape."));
       right_tr.reset(
           new Tensor(right->dtype(),
                      TensorShape({right->dim_size(1), right->dim_size(0)})));
diff --git a/tensorflow/core/kernels/sparse_reduce_op.cc b/tensorflow/core/kernels/sparse_reduce_op.cc
index b65f31e5eb174e..eb56b7390b0f9a 100644
--- a/tensorflow/core/kernels/sparse_reduce_op.cc
+++ b/tensorflow/core/kernels/sparse_reduce_op.cc
@@ -219,7 +219,20 @@ class SparseReduceOp : public OpKernel {
     sp.Reorder<T>(reduction.reorder_dims);
     for (const auto &g : sp.group(reduction.group_by_dims)) {
       Op::template Run<T>(ctx, reduced_val, g.template values<T>());
+      OP_REQUIRES(ctx,
+                  output_strides.empty() ||
+                  (g.group().size() == output_strides.size()),
+                  errors::Internal(
+                      "Expected group size and output_strides size to match",
+                      ", but got ", g.group().size(), " and ",
+                      output_strides.size()));
       const int64 idx = CoordinatesToFlatIndex(g.group(), output_strides);
+      OP_REQUIRES(ctx,
+                  idx >= 0 && idx < out_flat.size(),
+                  errors::Internal(
+                      "Obtained a write index of ", idx,
+                      " which is outside of bounds of [0, ",
+                      out_flat.size(), ")"));
       out_flat(idx) = reduced_val();
       VLOG(2) << "coords: " << absl::StrJoin(g.group(), ",")
               << "; idx: " << idx << "; group " << Op::Name() << ": "
diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc
index 6eb5f0af635c28..c3b1932a1735b0 100644
--- a/tensorflow/core/kernels/sparse_reshape_op.cc
+++ b/tensorflow/core/kernels/sparse_reshape_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/reshape_util.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -34,6 +35,17 @@ class SparseReshapeOp : public OpKernel {
   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
+    const Tensor& input_indices_in = context->input(0);
+    const Tensor& input_shape_in = context->input(1);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()),
+                errors::InvalidArgument("Input must be a matrix."));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
+                errors::InvalidArgument("Input shape must be a vector."));
+    OP_REQUIRES(context,
+                input_indices_in.dim_size(1) == input_shape_in.dim_size(0),
+                errors::InvalidArgument(
+                    "Input tensor rank must match input shape length."));
     ReshapeSparseTensor(context, context->input(0), context->input(1),
                         context->input(2), 0 /* output indices index */,
                         1 /* output shape index */);
diff --git a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
index 43dc9ae70cd627..eb993a5965043b 100644
--- a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
@@ -150,6 +150,7 @@ class SparseSparseBinaryOpShared : public OpKernel {
 
     const int64 a_nnz = a_indices_t->dim_size(0);
     const int64 b_nnz = b_indices_t->dim_size(0);
+
     const auto a_values = a_values_t->vec<T>();
     const auto b_values = b_values_t->vec<T>();
 
@@ -166,6 +167,14 @@ class SparseSparseBinaryOpShared : public OpKernel {
                     "Input shapes should be a vector but received shapes ",
                     a_shape_t->shape().DebugString(), " and ",
                     b_shape_t->shape().DebugString()));
+    const int num_dims = a_indices_t->dim_size(1);
+    OP_REQUIRES(
+        ctx, a_shape_t->NumElements() == num_dims,
+        errors::InvalidArgument("Second dimension of a_indices and length of "
+                                "a_shape must match, got ",
+                                num_dims, " and ", a_shape_t->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument("Tensors must not be empty"));
     OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
                 errors::InvalidArgument(
                     "Operands do not have the same ranks; got shapes: ",
@@ -180,7 +189,6 @@ class SparseSparseBinaryOpShared : public OpKernel {
                                           " for dimension ", i));
     }
 
-    const int num_dims = a_indices_t->dim_size(1);
     const auto a_indices_mat = a_indices_t->matrix<int64>();
     const auto b_indices_mat = b_indices_t->matrix<int64>();
     std::vector<T> a_augmented_values, b_augmented_values;
diff --git a/tensorflow/core/kernels/sparse_split_op.cc b/tensorflow/core/kernels/sparse_split_op.cc
index 5c0457aa956c85..ece6e832e86434 100644
--- a/tensorflow/core/kernels/sparse_split_op.cc
+++ b/tensorflow/core/kernels/sparse_split_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -63,6 +64,16 @@ class SparseSplitOp : public OpKernel {
                                         input_shape.vec<int64>()(axis),
                                         "), got ", num_split_));
 
+    // Prevent overflow by constructing the dense shape separately
+    int64 total_elements = 1;
+    const auto input_shape_flat = input_shape.flat<int64>();
+    for (int i = 0; i < input_shape.NumElements(); i++) {
+      total_elements =
+          MultiplyWithoutOverflow(total_elements, input_shape_flat(i));
+      OP_REQUIRES(context, total_elements >= 0,
+                  errors::Internal("Encountered overflow in dense shape"));
+    }
+
     sparse::SparseTensor sparse_tensor;
     OP_REQUIRES_OK(context,
                    sparse::SparseTensor::Create(
diff --git a/tensorflow/core/kernels/sparse_tensors_map_ops.cc b/tensorflow/core/kernels/sparse_tensors_map_ops.cc
index c2c0e43ca2ba8d..5ea5fca544d3e9 100644
--- a/tensorflow/core/kernels/sparse_tensors_map_ops.cc
+++ b/tensorflow/core/kernels/sparse_tensors_map_ops.cc
@@ -21,9 +21,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -31,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -254,7 +252,22 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
         errors::InvalidArgument(
             "Rank of input SparseTensor should be > 1, but saw rank: ", rank));
 
-    TensorShape tensor_input_shape(input_shape->vec<int64>());
+    auto input_shape_vec = input_shape->vec<int64>();
+    int new_num_elements = 1;
+    bool overflow_ocurred = false;
+    for (int i = 0; i < input_shape_vec.size(); i++) {
+      new_num_elements =
+          MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i));
+      if (new_num_elements < 0) {
+        overflow_ocurred = true;
+      }
+    }
+
+    OP_REQUIRES(
+        context, !overflow_ocurred,
+        errors::Internal("Encountered overflow from large input shape."));
+
+    TensorShape tensor_input_shape(input_shape_vec);
     gtl::InlinedVector<int64, 8> std_order(rank);
     std::iota(std_order.begin(), std_order.end(), 0);
     SparseTensor input_st;
@@ -262,8 +275,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
                                                  tensor_input_shape, std_order,
                                                  &input_st));
 
-    auto input_shape_t = input_shape->vec<int64>();
-    const int64 N = input_shape_t(0);
+    const int64 N = input_shape_vec(0);
 
     Tensor sparse_handles(DT_INT64, TensorShape({N}));
     auto sparse_handles_t = sparse_handles.vec<int64>();
@@ -274,7 +286,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
     // minibatch entries.
     TensorShape output_shape;
     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                input_shape_t.data() + 1,
+                                input_shape_vec.data() + 1,
                                 input_shape->NumElements() - 1, &output_shape));
 
     // Get groups by minibatch dimension
diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc
index fc070610877d5f..3497f95f13d306 100644
--- a/tensorflow/core/kernels/split_v_op.cc
+++ b/tensorflow/core/kernels/split_v_op.cc
@@ -138,8 +138,17 @@ class SplitVOpBase : public OpKernel {
       (*split_sizes_vec)[neg_one_dim] = input_size_split_dim - determined_size;
     }
 
-    // Special case 2: split along the 1st dimension. We can share the
-    // underlying buffer.
+    for (int i = 0; i < split_sizes_vec->size(); ++i) {
+      const Tlen& split_size = (*split_sizes_vec)[i];
+      OP_REQUIRES(context, split_size >= Tlen(0),
+                  errors::InvalidArgument("Split size at index ", i,
+                                          " must be >= 0. Got: ", split_size));
+    }
+
+    // Special case 2: split along the 1st dimension. The requirements are that
+    // either we are splitting the outer dimension of two or more such that
+    // every outer subpart is aligned or that the split sizes mean that they are
+    // always aligned. In these cases, we can share the underlying buffer.
     //
     // Apply this optimization conservatively: if input is aligned,
     // the resulting tensors must be aligned. It's conservative
diff --git a/tensorflow/core/kernels/string_ngrams_op.cc b/tensorflow/core/kernels/string_ngrams_op.cc
index 8aed2b3831a2f4..97af9abc4454ac 100644
--- a/tensorflow/core/kernels/string_ngrams_op.cc
+++ b/tensorflow/core/kernels/string_ngrams_op.cc
@@ -53,6 +53,12 @@ class StringNGramsOp : public tensorflow::OpKernel {
   }
 
   void Compute(tensorflow::OpKernelContext* context) override {
+    for (int ngram_width : ngram_widths_) {
+      OP_REQUIRES(
+          context, ngram_width > 0,
+          errors::InvalidArgument("ngram_widths must contain positive values"));
+    }
+
     const tensorflow::Tensor* data;
     OP_REQUIRES_OK(context, context->input("data", &data));
     const auto& input_data = data->flat<tstring>().data();
@@ -61,16 +67,28 @@ class StringNGramsOp : public tensorflow::OpKernel {
     OP_REQUIRES_OK(context, context->input("data_splits", &splits));
     const auto& splits_vec = splits->flat<SPLITS_TYPE>();
 
-    // Validate that the splits are valid indices into data
+    // Validate that the splits are valid indices into data, only if there are
+    // splits specified.
     const int input_data_size = data->flat<tstring>().size();
     const int splits_vec_size = splits_vec.size();
-    for (int i = 0; i < splits_vec_size; ++i) {
-      bool valid_splits = splits_vec(i) >= 0;
-      valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
-      OP_REQUIRES(
-          context, valid_splits,
-          errors::InvalidArgument("Invalid split value ", splits_vec(i),
-                                  ", must be in [0,", input_data_size, "]"));
+    if (splits_vec_size > 0) {
+      int prev_split = splits_vec(0);
+      OP_REQUIRES(context, prev_split == 0,
+                  errors::InvalidArgument("First split value must be 0, got ",
+                                          prev_split));
+      for (int i = 1; i < splits_vec_size; ++i) {
+        bool valid_splits = splits_vec(i) >= prev_split;
+        valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
+        OP_REQUIRES(context, valid_splits,
+                    errors::InvalidArgument(
+                        "Invalid split value ", splits_vec(i), ", must be in [",
+                        prev_split, ", ", input_data_size, "]"));
+        prev_split = splits_vec(i);
+      }
+      OP_REQUIRES(context, prev_split == input_data_size,
+                  errors::InvalidArgument(
+                      "Last split value must be data size. Expected ",
+                      input_data_size, ", got ", prev_split));
     }
 
     int num_batch_items = splits_vec.size() - 1;
@@ -174,13 +192,31 @@ class StringNGramsOp : public tensorflow::OpKernel {
         ngram->append(left_pad_);
         ngram->append(separator_);
       }
+      // Only output first num_tokens - 1 pairs of data and separator
       for (int n = 0; n < num_tokens - 1; ++n) {
         ngram->append(data[data_start_index + n]);
         ngram->append(separator_);
       }
-      ngram->append(data[data_start_index + num_tokens - 1]);
-      for (int n = 0; n < right_padding; ++n) {
-        ngram->append(separator_);
+      // Handle case when there are no tokens or no right padding as these can
+      // result in consecutive separators.
+      if (num_tokens > 0) {
+        // If we have tokens, then output last and then pair each separator with
+        // the right padding that follows, to ensure ngram ends either with the
+        // token or with the right pad.
+        ngram->append(data[data_start_index + num_tokens - 1]);
+        for (int n = 0; n < right_padding; ++n) {
+          ngram->append(separator_);
+          ngram->append(right_pad_);
+        }
+      } else {
+        // If we don't have tokens, then the last item inserted into the ngram
+        // has been the separator from the left padding loop above. Hence,
+        // output right pad and separator and make sure to finish with a
+        // padding, not a separator.
+        for (int n = 0; n < right_padding - 1; ++n) {
+          ngram->append(right_pad_);
+          ngram->append(separator_);
+        }
         ngram->append(right_pad_);
       }
 
diff --git a/tensorflow/core/kernels/string_ngrams_op_test.cc b/tensorflow/core/kernels/string_ngrams_op_test.cc
index b89de9ad16dab8..0d52283bd8fb9d 100644
--- a/tensorflow/core/kernels/string_ngrams_op_test.cc
+++ b/tensorflow/core/kernels/string_ngrams_op_test.cc
@@ -542,6 +542,40 @@ TEST_F(NgramKernelTest, TestEmptyInput) {
   assert_int64_equal(expected_splits, *GetOutput(1));
 }
 
+TEST_F(NgramKernelTest, TestNoTokens) {
+  MakeOp("|", {3}, "L", "R", -1, false);
+  // Batch items are:
+  // 0:
+  // 1: "a"
+  AddInputFromArray<tstring>(TensorShape({1}), {"a"});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values(
+      {"L|L|R", "L|R|R",             // no input in first split
+       "L|L|a", "L|a|R", "a|R|R"});  // second split
+  std::vector<int64> expected_splits({0, 2, 5});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
+TEST_F(NgramKernelTest, TestNoTokensNoPad) {
+  MakeOp("|", {3}, "", "", 0, false);
+  // Batch items are:
+  // 0:
+  // 1: "a"
+  AddInputFromArray<tstring>(TensorShape({1}), {"a"});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values({});
+  std::vector<int64> expected_splits({0, 0, 0});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
 TEST_F(NgramKernelTest, ShapeFn) {
   ShapeInferenceTestOp op("StringNGrams");
   INFER_OK(op, "?;?", "[?];[?]");
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index e382381e122324..ab83efda2a2e17 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -51,6 +51,11 @@ class SubstrOp : public OpKernel {
     const Tensor& len_tensor = context->input(2);
     const TensorShape& input_shape = input_tensor.shape();
     const TensorShape& pos_shape = pos_tensor.shape();
+    const TensorShape& len_shape = len_tensor.shape();
+    OP_REQUIRES(context, (pos_shape == len_shape),
+                errors::InvalidArgument(
+                    "pos and len should have the same shape, got: ",
+                    pos_shape.DebugString(), " vs. ", len_shape.DebugString()));
 
     bool is_scalar = TensorShapeUtils::IsScalar(pos_shape);
 
diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc
index 7f888da69d6c7f..6764deee21d286 100644
--- a/tensorflow/core/kernels/summary_kernels.cc
+++ b/tensorflow/core/kernels/summary_kernels.cc
@@ -38,12 +38,20 @@ class CreateSummaryFileWriterOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor* tmp;
     OP_REQUIRES_OK(ctx, ctx->input("logdir", &tmp));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tmp->shape()),
+                errors::InvalidArgument("logdir must be a scalar"));
     const string logdir = tmp->scalar<tstring>()();
     OP_REQUIRES_OK(ctx, ctx->input("max_queue", &tmp));
-    const int32 max_queue = tmp->scalar<int32>()();
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tmp->shape()),
+                errors::InvalidArgument("max_queue must be a scalar"));
+    const int32_t max_queue = tmp->scalar<int32>()();
     OP_REQUIRES_OK(ctx, ctx->input("flush_millis", &tmp));
-    const int32 flush_millis = tmp->scalar<int32>()();
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tmp->shape()),
+                errors::InvalidArgument("flush_millis must be a scalar"));
+    const int32_t flush_millis = tmp->scalar<int32>()();
     OP_REQUIRES_OK(ctx, ctx->input("filename_suffix", &tmp));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(tmp->shape()),
+                errors::InvalidArgument("filename_suffix must be a scalar"));
     const string filename_suffix = tmp->scalar<tstring>()();
 
     core::RefCountPtr<SummaryWriterInterface> s;
diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc
index c24c7f1b0bc48b..2aec5252b6ea0e 100644
--- a/tensorflow/core/kernels/tile_ops.cc
+++ b/tensorflow/core/kernels/tile_ops.cc
@@ -188,7 +188,8 @@ class TileOp : public OpKernel {
           context, multiples_array[i] >= 0,
           errors::InvalidArgument("Expected multiples[", i, "] >= 0, but got ",
                                   multiples_array[i]));
-      output_shape.AddDim(input.dim_size(i) * multiples_array[i]);
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(
+                                  input.dim_size(i) * multiples_array[i]));
     }
     if (output_shape == input.shape()) {
       context->set_output(0, input);
diff --git a/tensorflow/core/kernels/transpose_functor.h b/tensorflow/core/kernels/transpose_functor.h
index e6aaca8ff5d26c..cff96207fd3da9 100644
--- a/tensorflow/core/kernels/transpose_functor.h
+++ b/tensorflow/core/kernels/transpose_functor.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <numeric>
 #include <string>
 #include <vector>
+
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/platform/logging.h"
@@ -166,7 +167,6 @@ template <typename Device>
 Status DoTransposeImpl(const Device& d, const Tensor& in,
                        const gtl::ArraySlice<int32> perm, bool conjugate,
                        Tensor* out) {
-  CHECK_GE(in.dims(), 2);
   CHECK_EQ(in.dims(), out->dims());
   CHECK_EQ(in.dims(), perm.size());
   CHECK_EQ(in.dtype(), out->dtype());
@@ -247,7 +247,6 @@ inline Status DoMatrixTransposeImpl(const Device& device, const Tensor& in,
   return DoTransposeImpl(device, in, perm, conjugate, out);
 }
 
-
 }  // namespace internal
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/unicode_ops.cc b/tensorflow/core/kernels/unicode_ops.cc
index d3a7ad7b2866f7..ab09dbe1d54293 100644
--- a/tensorflow/core/kernels/unicode_ops.cc
+++ b/tensorflow/core/kernels/unicode_ops.cc
@@ -533,6 +533,21 @@ class UnicodeEncodeOp : public OpKernel {
     const Tensor& input_splits = context->input(1);
     const auto input_splits_flat = input_splits.flat<SPLITS_TYPE>();
 
+    OP_REQUIRES(
+        context, input_splits.NumElements() > 0,
+        errors::InvalidArgument("Input_splits should contain elements, but "
+                                "given input_values has 0 elements"));
+    // Operation will treat first argument in input_splits as if it were zero
+    // regardless of its actual value since splits should begin with zero and
+    // end with the length of the input values vector.
+    OP_REQUIRES(
+        context, input_splits_flat(0) == 0,
+        errors::InvalidArgument("First value in input_splits must be zero."));
+    OP_REQUIRES(context,
+                input_splits_flat(input_splits_flat.size() - 1) ==
+                    input_tensor_flat.size(),
+                errors::InvalidArgument("Last value in input_splits must be "
+                                        "equal to length of input_tensor."));
     // Since we limit to a 2-D input (flat_values of rank 1 and a single splits
     // tensor), our output dimension will be 1 with it's size equal to the
     // number of splits (outer dimension or ragged tensor).
@@ -548,6 +563,14 @@ class UnicodeEncodeOp : public OpKernel {
     for (int i = 1; i < input_splits_flat.size(); ++i) {
       icu::UnicodeString unicode_string;
       icu::UnicodeStringAppendable appendable_unicode_string(unicode_string);
+      OP_REQUIRES(
+          context, input_splits_flat(i - 1) <= input_splits_flat(i),
+          errors::InvalidArgument(
+              "Values in input_splits must be equal or in ascending order."));
+      OP_REQUIRES(
+          context, input_splits_flat(i) <= input_tensor_flat.size(),
+          errors::InvalidArgument("Values in input_splits must be less than or "
+                                  "equal to input_tensor length."));
       for (; idx < input_splits_flat(i); ++idx) {
         int32 code_point = input_tensor_flat(idx);
         // Check for invalid code point
diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc
index 11d9dac70f7046..d5adef3bac170d 100644
--- a/tensorflow/core/kernels/unravel_index_op.cc
+++ b/tensorflow/core/kernels/unravel_index_op.cc
@@ -53,6 +53,14 @@ class UnravelIndexOp : public OpKernel {
                                 dims_tensor.shape().DebugString(), "\""));
 
     auto dims = dims_tensor.vec<Tidx>();
+    // Make sure dims does not contain a zero
+    for (int i = 0; i < dims.size(); i++) {
+      OP_REQUIRES(
+          ctx, dims(i) != 0,
+          errors::InvalidArgument("Input dims cannot contain a dim of zero, "
+                                  "but dims contains zero at index ",
+                                  i));
+    }
 
     // Chek to make sure indices is not out of boundary
     Eigen::Tensor<Tidx, 0, Eigen::RowMajor> dims_prod_eigen = dims.prod();
diff --git a/tensorflow/core/kernels/unsorted_segment_join_op.cc b/tensorflow/core/kernels/unsorted_segment_join_op.cc
index 7464e165e46c8b..9acfe7fb1e4952 100644
--- a/tensorflow/core/kernels/unsorted_segment_join_op.cc
+++ b/tensorflow/core/kernels/unsorted_segment_join_op.cc
@@ -90,6 +90,8 @@ class UnsortedSegmentJoinOp : public OpKernel {
     const int32 segment_dims = segment_id_shape.dims();
 
     const Tensor& num_segments_tensor = context->input(2);
+    OP_REQUIRES(context, num_segments_tensor.NumElements() != 0,
+                errors::InvalidArgument("Number of segments cannot be empty."));
     auto num_segments = num_segments_tensor.scalar<NUM_SEGMENTS_TYPE>()();
 
     OP_REQUIRES(context, segment_dims != 0,
diff --git a/tensorflow/core/kernels/xent_op.cc b/tensorflow/core/kernels/xent_op.cc
index 0e826274f2ebd3..56c3fd9881bea8 100644
--- a/tensorflow/core/kernels/xent_op.cc
+++ b/tensorflow/core/kernels/xent_op.cc
@@ -44,7 +44,8 @@ class SoftmaxXentWithLogitsOp : public OpKernel {
     TensorShape shape_in = logits_in.shape();
 
     BCast bcast(BCast::FromShape(logits_in.shape()),
-                BCast::FromShape(labels_in.shape()));
+                BCast::FromShape(labels_in.shape()),
+                /*fewer_dims_optimization=*/false);
     if (!logits_in.IsSameSize(labels_in)) {
       OP_REQUIRES(context, bcast.IsValid(),
                   errors::InvalidArgument(
@@ -76,20 +77,12 @@ class SoftmaxXentWithLogitsOp : public OpKernel {
                                 {0}, 1, shape_in, &back_out));
     if (shape_in.dim_size(0) > 0) {
       functor::XentFunctor<Device, T> functor;
-      if (logits_in.IsSameSize(labels_in)) {
-        functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
-                Eigen::array<Eigen::DenseIndex, 2>{1, 1},
-                Eigen::array<Eigen::DenseIndex, 2>{1, 1}, logits_in.matrix<T>(),
-                labels_in.matrix<T>(), scratch.matrix<T>(), loss_out->vec<T>(),
-                back_out->matrix<T>());
-      } else {
-        functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
-                BCast::ToIndexArray<2>(bcast.x_bcast()),
-                BCast::ToIndexArray<2>(bcast.y_bcast()),
-                logits_in.template shaped<T, 2>(bcast.x_reshape()),
-                labels_in.template shaped<T, 2>(bcast.y_reshape()),
-                scratch.matrix<T>(), loss_out->vec<T>(), back_out->matrix<T>());
-      }
+      functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
+              BCast::ToIndexArray<2>(bcast.x_bcast()),
+              BCast::ToIndexArray<2>(bcast.y_bcast()),
+              logits_in.template shaped<T, 2>(bcast.x_reshape()),
+              labels_in.template shaped<T, 2>(bcast.y_reshape()),
+              scratch.matrix<T>(), loss_out->vec<T>(), back_out->matrix<T>());
     }
   }
 };
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 2018f793741ccf..277a00354f46ca 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -168,7 +168,7 @@ Status TransposeShapeFn(InferenceContext* c) {
 
     for (int32 i = 0; i < rank; ++i) {
       int64 in_idx = data[i];
-      if (in_idx >= rank) {
+      if (in_idx >= rank || in_idx <= -rank) {
         return errors::InvalidArgument("perm dim ", in_idx,
                                        " is out of range of input rank ", rank);
       }
@@ -680,6 +680,12 @@ REGISTER_OP("SplitV")
           if (data[i] == -1 && c->ValueKnown(split_dim_size)) {
             size = split_dim_size - total_size;
           }
+          // If we have a negative known size (either explicit, or computed
+          // via -1), then the split sizes are invalid.
+          if (size < -1 || (size == -1 && c->ValueKnown(split_dim_size))) {
+            return errors::InvalidArgument("Split size at index ", i,
+                                           " must be >= 0. Got: ", size);
+          }
           TF_RETURN_IF_ERROR(
               c->ReplaceDim(input, split_dim, c->MakeDim(size), &output_shape));
           c->set_output(i, output_shape);
@@ -2797,7 +2803,10 @@ REGISTER_OP("QuantizeAndDequantizeV2")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument("axis should be at least -1, got ",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2829,7 +2838,10 @@ REGISTER_OP("QuantizeAndDequantizeV4")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument("axis should be at least -1, got ",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2857,7 +2869,10 @@ REGISTER_OP("QuantizeAndDequantizeV4Grad")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(3), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument("axis should be at least -1, got ",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2890,7 +2905,10 @@ REGISTER_OP("QuantizeAndDequantizeV3")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument("axis should be at least -1, got ",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2957,6 +2975,10 @@ REGISTER_OP("Dequantize")
       if (!s.ok() && s.code() != error::NOT_FOUND) {
         return s;
       }
+      if (axis < -1) {
+        return errors::InvalidArgument("axis should be at least -1, got ",
+                                       axis);
+      }
       const int minmax_rank = (axis == -1) ? 0 : 1;
       TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
       ShapeHandle minmax;
diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc
index 412c926d3863a6..2fe45474bfaa44 100644
--- a/tensorflow/core/ops/array_ops_test.cc
+++ b/tensorflow/core/ops/array_ops_test.cc
@@ -1363,6 +1363,8 @@ TEST(ArrayOpsTest, QuantizeAndDequantizeV2_ShapeFn) {
   INFER_ERROR("Shapes must be equal rank, but are 1 and 0", op,
               "[1,2,?,4,5];[];[1]");
   INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[1,2,?,4,5];[1];[1]");
+  (*op.node_def.mutable_attr())["axis"].set_i(-2);
+  INFER_ERROR("axis should be at least -1, got -2", op, "?;?;?");
 }
 
 TEST(ArrayOpsTest, SpaceToBatch_ShapeFn) {
diff --git a/tensorflow/core/ops/count_ops.cc b/tensorflow/core/ops/count_ops.cc
index 8de0a2ef95459b..95e8026fd8b663 100644
--- a/tensorflow/core/ops/count_ops.cc
+++ b/tensorflow/core/ops/count_ops.cc
@@ -41,6 +41,8 @@ Status DenseCountSparseOutputShapeFn(InferenceContext *c) {
 }
 
 Status SparseCountSparseOutputShapeFn(InferenceContext *c) {
+  ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
   auto rank = c->Dim(c->input(0), 1);
   auto nvals = c->UnknownDim();
   c->set_output(0, c->Matrix(nvals, rank));  // out.indices
diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc
index 1dd7659e137fe3..ff6d2852a66271 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops.cc
@@ -81,11 +81,17 @@ REGISTER_OP("CudnnRNN")
     .Attr("seed2: int = 0")
     .Attr("is_training: bool = true")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+
       auto seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
+
       string direction;
       TF_RETURN_IF_ERROR(c->GetAttr("direction", &direction));
       string rnn_mode;
@@ -124,8 +130,13 @@ REGISTER_OP("CudnnRNNV2")
     .Attr("seed2: int = 0")
     .Attr("is_training: bool = true")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+
       auto seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
@@ -171,16 +182,26 @@ REGISTER_OP("CudnnRNNV3")
     .Attr("is_training: bool = true")
     .Attr("time_major: bool = true")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
       auto input_c_shape = c->input(2);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 1, &unused));
+
       auto max_seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
+
       string direction;
       TF_RETURN_IF_ERROR(c->GetAttr("direction", &direction));
       string rnn_mode;
       TF_RETURN_IF_ERROR(c->GetAttr("rnn_mode", &rnn_mode));
+      if (rnn_mode == "lstm") {
+        TF_RETURN_IF_ERROR(c->WithRank(input_c_shape, 3, &unused));
+      }
       int dir_count = (direction == "bidirectional") ? 2 : 1;
       DimensionHandle output_size;
       TF_RETURN_IF_ERROR(c->Multiply(num_units, dir_count, &output_size));
diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
index 8e8c8193a14a48..91043efa425a0b 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
@@ -68,6 +68,11 @@ TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) {
                    .Attr("direction", "unidirectional")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR("Shape must be rank 3 ", op, "[];[?,?,?];[?,?,?];[?]");
+  INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[];[?,?,?];[?]");
+  // Disabled because the kernel does not check shape of input_c.
+  // INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[?,?,?];[?];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[?,?,?];[]");
 }
 
 TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) {
@@ -100,6 +105,11 @@ TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) {
                    .Attr("direction", "unidirectional")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR("Shape must be rank 3 ", op, "[];[?,?,?];[?,?,?];[?]");
+  INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[];[?,?,?];[?]");
+  // Disabled because the kernel does not check shape of input_c.
+  // INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[?,?,?];[?];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[?,?,?];[]");
 }
 
 TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) {
@@ -137,6 +147,52 @@ TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) {
                    .Attr("direction", "unidirectional")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR("Shape must be rank 3 ", op, "[];[?,?,?];[?,?,?];[?];[?]");
+  INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[];[?,?,?];[?];[?]");
+  INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[?,?,?];[];[?];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[?,?,?];[];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[?,?,?];[?];[]");
+}
+
+TEST(CudnnRNNOpsTest, ForwardV3Gru) {
+  int max_seq_length = 2;
+  int batch_size = 3;
+  int num_units = 4;
+  int num_layers = 5;
+  int dir_count = 1;
+  std::vector<int> input_shape = {max_seq_length, batch_size, num_units};
+  std::vector<int> input_h_shape = {num_layers * dir_count, batch_size,
+                                    num_units};
+  std::vector<int> input_c_shape = {num_layers * dir_count, batch_size,
+                                    num_units};
+  std::vector<int> output_shape = {max_seq_length, batch_size,
+                                   num_units * dir_count};
+  std::vector<int> seq_lengths_shape = {batch_size};
+  auto shape_to_str = [](const std::vector<int>& v) {
+    return strings::StrCat("[", absl::StrJoin(v, ","), "]");
+  };
+  string input_shapes_desc = strings::StrCat(
+      shape_to_str(input_shape), ";", shape_to_str(input_h_shape), ";",
+      shape_to_str(input_c_shape), ";", "[?]", ";",
+      shape_to_str(seq_lengths_shape));
+  string output_shapes_desc = "[d0_0,d0_1,d1_2];in1;[];?;?";
+
+  ShapeInferenceTestOp op("CudnnRNNV3");
+  TF_ASSERT_OK(NodeDefBuilder("test", "CudnnRNNV3")
+                   .Input({"input", 0, DT_FLOAT})
+                   .Input({"input_h", 0, DT_FLOAT})
+                   .Input({"input_c", 0, DT_FLOAT})
+                   .Input({"params", 0, DT_FLOAT})
+                   .Input({"sequence_lengths", 0, DT_INT32})
+                   .Attr("rnn_mode", "gru")
+                   .Attr("input_mode", "auto_select")
+                   .Attr("direction", "unidirectional")
+                   .Finalize(&op.node_def));
+  INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR("Shape must be rank 3 ", op, "[];[?,?,?];[];[?];[?]");
+  INFER_ERROR("Shape must be rank 3 ", op, "[?,?,?];[];[];[?];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[];[];[?]");
+  INFER_ERROR("Shape must be rank 1 ", op, "[?,?,?];[?,?,?];[];[?];[]");
 }
 
 }  // end namespace tensorflow
diff --git a/tensorflow/core/ops/lookup_ops.cc b/tensorflow/core/ops/lookup_ops.cc
index 8948df2cef361a..05aa229336d46f 100644
--- a/tensorflow/core/ops/lookup_ops.cc
+++ b/tensorflow/core/ops/lookup_ops.cc
@@ -169,10 +169,6 @@ REGISTER_OP("LookupTableFindV2")
       ShapeHandle handle;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &handle));
 
-      // Default value must be scalar or vector.
-      ShapeHandle keys;
-      TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(2), 1, &keys));
-
       ShapeAndType value_shape_and_type;
       TF_RETURN_IF_ERROR(ValidateTableResourceHandle(
           c,
diff --git a/tensorflow/core/ops/lookup_ops_test.cc b/tensorflow/core/ops/lookup_ops_test.cc
index ac899d59993381..904099f1813a4a 100644
--- a/tensorflow/core/ops/lookup_ops_test.cc
+++ b/tensorflow/core/ops/lookup_ops_test.cc
@@ -25,7 +25,6 @@ namespace {
 TEST(LookupOpsTest, LookupTableFindV2_ShapeFn) {
   ShapeInferenceTestOp op("LookupTableFindV2");
   INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[?];?;?");
-  INFER_ERROR("Shape must be at most rank 1 but is rank 2", op, "[];?;[1,1]");
   TF_ASSERT_OK(NodeDefBuilder("test", "LookupTableFindV2")
                    .Input({"table_handle", 0, DT_RESOURCE})
                    .Input({"keys", 0, DT_INT64})
diff --git a/tensorflow/core/ops/ragged_array_ops.cc b/tensorflow/core/ops/ragged_array_ops.cc
index 4eefa6181c2214..f113c779195002 100644
--- a/tensorflow/core/ops/ragged_array_ops.cc
+++ b/tensorflow/core/ops/ragged_array_ops.cc
@@ -64,6 +64,7 @@ REGISTER_OP("RaggedCross")
     .SetShapeFn([](shape_inference::InferenceContext* c) {
       std::vector<DataType> ragged_values_types;
       std::vector<DataType> ragged_splits_types;
+      std::vector<DataType> sparse_values_types;
       std::vector<DataType> dense_types;
 
       TF_RETURN_IF_ERROR(
@@ -71,15 +72,21 @@ REGISTER_OP("RaggedCross")
       TF_RETURN_IF_ERROR(
           c->GetAttr("ragged_splits_types", &ragged_splits_types));
       TF_RETURN_IF_ERROR(c->GetAttr("dense_types", &dense_types));
+      TF_RETURN_IF_ERROR(
+          c->GetAttr("sparse_values_types", &sparse_values_types));
 
       int num_ragged = ragged_values_types.size();
       if (num_ragged != ragged_splits_types.size()) {
         return errors::InvalidArgument(
-            "Parameters `values` and `row_splits` must be the same length");
+            "ragged values and splits must have the same length.");
       }
 
       int num_sparse;
       TF_RETURN_IF_ERROR(c->GetAttr("Nsparse", &num_sparse));
+      if (num_sparse != sparse_values_types.size()) {
+        return errors::InvalidArgument(
+            "sparse indices and values must have the same length");
+      }
 
       ShapeHandle out_values = c->UnknownShapeOfRank(1);
       ShapeHandle out_splits = c->UnknownShapeOfRank(1);
@@ -99,7 +106,14 @@ REGISTER_OP("RaggedCross")
       int dense_start = num_ragged * 2 + num_sparse * 3;
       for (int i = 0; i < dense_types.size(); ++i) {
         ShapeHandle dense_input = c->input(i + dense_start);
-        int64 batch_size = c->Value(c->Dim(dense_input, 0));
+        int32 rank = c->Rank(dense_input);
+        if (rank == InferenceContext::kUnknownRank) {
+          continue;
+        } else if (rank != 2) {
+          return errors::InvalidArgument(
+              "tf.ragged.cross only supports inputs with rank=2");
+        }
+        int64_t batch_size = c->Value(c->Dim(dense_input, 0));
         if (batch_size != InferenceContext::kUnknownDim) {
           ShapeHandle row_splits = c->Vector(batch_size + 1);
           if (!c->Merge(out_splits, row_splits, &out_splits).ok()) {
diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc
index 906cef1f5ecafe..c79aeebd94bd33 100644
--- a/tensorflow/core/ops/sparse_ops.cc
+++ b/tensorflow/core/ops/sparse_ops.cc
@@ -16,6 +16,8 @@ limitations under the License.
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -158,6 +160,8 @@ REGISTER_OP("DeserializeSparse")
     .Attr("Tserialized: {string, variant} = DT_STRING")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?, ..., ?, 3] vector.
+      ShapeHandle unused_shape;
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &unused_shape));
       DimensionHandle unused;
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), -1), 3, &unused));
       c->set_output(0, c->Matrix(InferenceContext::kUnknownDim,
@@ -619,6 +623,8 @@ REGISTER_OP("SparseFillEmptyRows")
       DimensionHandle unused_dim;
       TF_RETURN_IF_ERROR(c->Merge(c->Dim(input_indices, 1),
                                   c->Dim(input_shape, 0), &unused_dim));
+      if (c->Value(c->NumElements(input_shape)) == 0)
+        return errors::InvalidArgument("dense_shape must not be empty");
       ShapeHandle output_indices =
           c->Matrix(InferenceContext::kUnknownDim, c->NumElements(input_shape));
       ShapeHandle output_values = c->Vector(InferenceContext::kUnknownDim);
diff --git a/tensorflow/core/ops/tpu_cross_replica_ops.cc b/tensorflow/core/ops/tpu_cross_replica_ops.cc
index 1f10fe3136dd1c..404c3e59eff941 100644
--- a/tensorflow/core/ops/tpu_cross_replica_ops.cc
+++ b/tensorflow/core/ops/tpu_cross_replica_ops.cc
@@ -32,17 +32,35 @@ REGISTER_OP("AllToAll")
     .Attr("split_count: int")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
+      ShapeHandle group_assignment = c->input(1);
+
       int64 rank;
       if (c->RankKnown(input)) {
         rank = c->Rank(input);
       } else {
         return errors::InvalidArgument("input's rank is unknown.");
       }
+
       int concat_dimension;
       int split_dimension;
       int split_count;
 
       TF_RETURN_IF_ERROR(c->GetAttr("split_count", &split_count));
+      if (split_count < 1) {
+        return errors::InvalidArgument("split_count ", split_count,
+                                       " must at least be one.");
+      }
+      if (c->RankKnown(group_assignment) && c->Rank(group_assignment) != 2) {
+        return errors::InvalidArgument("group_assignment must have rank 2.");
+      }
+      DimensionHandle num_replicas_per_group = c->Dim(group_assignment, 1);
+      if (c->ValueKnown(num_replicas_per_group) &&
+          (c->Value(num_replicas_per_group) != split_count)) {
+        return errors::InvalidArgument(
+            "split_count ", split_count,
+            " must equal the size of the second dimension of group_assignment ",
+            c->Value(num_replicas_per_group));
+      }
 
       TF_RETURN_IF_ERROR(c->GetAttr("concat_dimension", &concat_dimension));
 
@@ -66,6 +84,12 @@ REGISTER_OP("AllToAll")
           dims[i] = c->MakeDim(c->Value(dims[i]) * split_count);
         }
         if (i == split_dimension) {
+          if (c->ValueKnown(dims[i]) &&
+              (c->Value(dims[i]) % split_count != 0)) {
+            return errors::InvalidArgument(
+                "input dimension ", c->Value(dims[i]),
+                " not divisible by split_count ", split_count);
+          }
           dims[i] = c->MakeDim(c->Value(dims[i]) / split_count);
         }
       }
diff --git a/tensorflow/core/platform/ctstring_internal.h b/tensorflow/core/platform/ctstring_internal.h
index 9524267176c24e..9a01d48fb1e91f 100644
--- a/tensorflow/core/platform/ctstring_internal.h
+++ b/tensorflow/core/platform/ctstring_internal.h
@@ -63,9 +63,9 @@ static inline uint32_t TF_swap32(uint32_t host_int) {
 #endif
 
 #if TF_TSTRING_LITTLE_ENDIAN
-#define TF_le32toh(x) TF_swap32(x)
-#else  // TF_TSTRING_LITTLE_ENDIAN
 #define TF_le32toh(x) x
+#else  // TF_TSTRING_LITTLE_ENDIAN
+#define TF_le32toh(x) TF_swap32(x)
 #endif  // TF_TSTRING_LITTLE_ENDIAN
 
 static inline size_t TF_align16(size_t i) { return (i + 0xF) & ~0xF; }
diff --git a/tensorflow/core/platform/ctstring_test.cc b/tensorflow/core/platform/ctstring_test.cc
index 4d82bcd87c327b..8624cc4ee73b49 100644
--- a/tensorflow/core/platform/ctstring_test.cc
+++ b/tensorflow/core/platform/ctstring_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 
+#include "tensorflow/core/platform/ctstring_internal.h"
 #include "tensorflow/core/platform/test.h"
 
 static const char kLongString[] =
@@ -329,3 +330,29 @@ TEST(TF_CTStringTest, ResizeReserve) {
     TF_TString_Dealloc(&s70);
   }
 }
+
+TEST(TF_CTStringTest, OffsetType) {
+  {
+    TF_TString s71;
+
+    TF_TString_Init(&s71);
+    size_t header_length = 24;
+    size_t size = 8;
+    TF_TString_ResizeUninitialized(&s71, header_length + size);
+    uint32_t save_size = s71.u.offset.size;
+    uint32_t save_offset = s71.u.offset.offset;
+    uint32_t save_count = s71.u.offset.count;
+
+    s71.u.offset.size = TF_TString_ToInternalSizeT(size, TF_TSTR_OFFSET);
+    s71.u.offset.offset = header_length;
+    s71.u.offset.count = 0;
+    EXPECT_EQ(size, TF_TString_GetSize(&s71));
+    EXPECT_EQ(TF_TSTR_OFFSET, TF_TString_GetType(&s71));
+
+    // restore state so string can be deallocated
+    s71.u.offset.size = save_size;
+    s71.u.offset.offset = save_offset;
+    s71.u.offset.count = save_count;
+    TF_TString_Dealloc(&s71);
+  }
+}
diff --git a/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto b/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto
index 038c7a1b8aa5c3..7e321158091cee 100644
--- a/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto
+++ b/tensorflow/core/protobuf/tpu/tpu_embedding_configuration.proto
@@ -87,6 +87,23 @@ message TPUEmbeddingConfiguration {
   // problem.
   bool pipeline_execution_with_tensor_core = 7;
 
+  // Directory where embedding lookup statistics are stored. These statistics
+  // summarize information about the inputs to the embedding lookup
+  // operation, in particular, the average number of embedding IDs per example
+  // and how well the embedding IDs are load balanced across the system. The
+  // lookup statistics are used during TPU initialization for embedding table
+  // partitioning. Collection of lookup statistics is done at runtime by
+  // profiling the embedding inputs: only 3% of input samples are profiled to
+  // minimize host CPU overhead. Once a suitable number of samples are
+  // profiled, the lookup statistics are saved to table-specific files in the
+  // profile data directory generally at the end of a TPU training loop. The
+  // filename corresponding to each table is obtained by hashing table specific
+  // parameters (e.g., table name and number of features) and global
+  // configuration parameters (e.g., sharding strategy and TPU worker task
+  // count). The same profile data directory can be shared amongst several
+  // models to reuse embedding lookup statistics.
+  string profile_data_directory = 9;
+
   // Extended output layout information; deprecated and now ignored.
   TPUEmbeddingOutputLayout output_layout = 8 [deprecated = true];
 }
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 6a7e9f6f548852..13b56ca8db2fae 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -22,7 +22,7 @@ limitations under the License.
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
 #define TF_MINOR_VERSION 4
-#define TF_PATCH_VERSION 0
+#define TF_PATCH_VERSION 4
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/image_resizer_state.h b/tensorflow/core/util/image_resizer_state.h
index 84459c9447edc2..e554b208e14fff 100644
--- a/tensorflow/core/util/image_resizer_state.h
+++ b/tensorflow/core/util/image_resizer_state.h
@@ -135,11 +135,16 @@ struct ImageResizerState {
   void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
     ValidateAndCalculateOutputSize(context, input);
     if (!context->status().ok()) return;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0,
-                                TensorShape({input.dim_size(0), out_height,
-                                             out_width, input.dim_size(3)}),
-                                &output));
+    
+    TensorShape shape;
+    // Guard against shape overflow
+    OP_REQUIRES_OK(context, shape.AddDimWithStatus(batch_size));
+
+    OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_height));
+    OP_REQUIRES_OK(context, shape.AddDimWithStatus(out_width));
+    OP_REQUIRES_OK(context, shape.AddDimWithStatus(channels));
+
+    OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output));
   }
 
   int64 batch_size;
diff --git a/tensorflow/core/util/saved_tensor_slice_util.h b/tensorflow/core/util/saved_tensor_slice_util.h
index 1f9768f5163d25..27916095bfb1fd 100644
--- a/tensorflow/core/util/saved_tensor_slice_util.h
+++ b/tensorflow/core/util/saved_tensor_slice_util.h
@@ -59,6 +59,9 @@ Status ParseShapeAndSlice(const string& shape_and_slice, TensorShape* shape,
 template <typename T>
 struct SaveTypeTraits;
 
+template <typename T>
+int TensorProtoDataSize(const TensorProto& t);
+
 template <typename T>
 const typename SaveTypeTraits<T>::SavedType* TensorProtoData(
     const TensorProto& t);
@@ -95,6 +98,10 @@ void Fill(T* data, size_t n, TensorProto* t);
 #define TENSOR_PROTO_EXTRACT_TYPE(TYPE, FIELD, FTYPE)             \
   TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, FTYPE)     \
   template <>                                                     \
+  inline int TensorProtoDataSize<TYPE>(const TensorProto& t) {    \
+    return t.FIELD##_val_size();                                  \
+  }                                                               \
+  template <>                                                     \
   inline void Fill(const TYPE* data, size_t n, TensorProto* t) {  \
     typename protobuf::RepeatedField<FTYPE> copy(data, data + n); \
     t->mutable_##FIELD##_val()->Swap(&copy);                      \
@@ -104,6 +111,10 @@ void Fill(T* data, size_t n, TensorProto* t);
 #define TENSOR_PROTO_EXTRACT_TYPE_COMPLEX(TYPE, FIELD, FTYPE)       \
   TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, TYPE)        \
   template <>                                                       \
+  inline int TensorProtoDataSize<TYPE>(const TensorProto& t) {      \
+    return t.FIELD##_val_size() / 2;                                \
+  }                                                                 \
+  template <>                                                       \
   inline void Fill(const TYPE* data, size_t n, TensorProto* t) {    \
     const FTYPE* sub = reinterpret_cast<const FTYPE*>(data);        \
     typename protobuf::RepeatedField<FTYPE> copy(sub, sub + 2 * n); \
@@ -136,6 +147,11 @@ TENSOR_PROTO_EXTRACT_TYPE(quint16, int, int32);
 template <>
 struct SaveTypeTraits<qint32> : SaveTypeTraits<int32> {};
 
+template <>
+inline int TensorProtoDataSize<qint32>(const TensorProto& t) {
+  return t.int_val_size();
+}
+
 template <>
 inline const int32* TensorProtoData<qint32>(const TensorProto& t) {
   static_assert(SaveTypeTraits<qint32>::supported,
@@ -158,6 +174,11 @@ struct SaveTypeTraits<Eigen::half> {
   typedef protobuf::RepeatedField<int32> RepeatedField;
 };
 
+template <>
+inline int TensorProtoDataSize<Eigen::half>(const TensorProto& t) {
+  return t.half_val_size();
+}
+
 template <>
 inline const int* TensorProtoData<Eigen::half>(const TensorProto& t) {
   return t.half_val().data();
@@ -187,6 +208,11 @@ struct SaveTypeTraits<tstring> {
   typedef protobuf::RepeatedPtrField<string> RepeatedField;
 };
 
+template <>
+inline int TensorProtoDataSize<tstring>(const TensorProto& t) {
+  return t.string_val_size();
+}
+
 template <>
 inline const string* const* TensorProtoData<tstring>(const TensorProto& t) {
   static_assert(SaveTypeTraits<tstring>::supported,
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index 062226d7699bc8..341290dbbc6982 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -527,6 +527,10 @@ inline Status SparseTensor::Split(const SparseTensor& input_tensor,
   for (int i = 0; i < input_tensor.indices().dim_size(0); ++i) {
     const int dim = input_tensor.indices().matrix<int64>()(i, split_dim);
     int slice_index = GetSliceIndex(dim, split_size, residual);
+    if (slice_index >= num_values.size()) {
+      return errors::InvalidArgument("Slice index ", slice_index,
+                                     " is larger than num_split.");
+    }
     num_values[slice_index]++;
   }
 
diff --git a/tensorflow/core/util/tensor_slice_reader.cc b/tensorflow/core/util/tensor_slice_reader.cc
index 58c4c22ce7a9e2..fd98d1b4041a30 100644
--- a/tensorflow/core/util/tensor_slice_reader.cc
+++ b/tensorflow/core/util/tensor_slice_reader.cc
@@ -168,9 +168,13 @@ void TensorSliceReader::LoadShard(int shard) const {
                           "checkpoint");
   if (!status_.ok()) return;
   for (const SavedSliceMeta& ssm : sts.meta().tensor()) {
-    TensorShape ssm_shape(ssm.shape());
+    TensorShape ssm_shape;
+    status_ = TensorShape::BuildTensorShapeBase(ssm.shape(), &ssm_shape);
+    if (!status_.ok()) return;
     for (const TensorSliceProto& tsp : ssm.slice()) {
-      TensorSlice ss_slice(tsp);
+      TensorSlice ss_slice;
+      status_ = TensorSlice::BuildTensorSlice(tsp, &ss_slice);
+      if (!status_.ok()) return;
       status_ = RegisterTensorSlice(ssm.name(), ssm_shape, ssm.type(), fname,
                                     ss_slice, &tensors_);
       if (!status_.ok()) return;
@@ -248,7 +252,9 @@ Status TensorSliceReader::GetTensor(
     slice = tss->Slices().begin()->second.slice;
   }
 
-  std::unique_ptr<tensorflow::Tensor> t(new tensorflow::Tensor(type, shape));
+  std::unique_ptr<tensorflow::Tensor> t(new tensorflow::Tensor);
+  Status s = tensorflow::Tensor::BuildTensor(type, shape, t.get());
+  if (!s.ok()) return s;
   bool success = false;
 
 #define READER_COPY(dt)                                                  \
diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h
index 0fb2e11bf8dd08..bc0a91523fe36c 100644
--- a/tensorflow/core/util/tensor_slice_reader.h
+++ b/tensorflow/core/util/tensor_slice_reader.h
@@ -181,6 +181,22 @@ bool TensorSliceReader::CopySliceData(const string& name,
               << slice_s.DebugString() << ": computed key = " << key;
       return false;
     }
+    // Ensure the TensorSlice contains the expected amount of data.
+    TensorShape shp_s;
+    Status s = slice_s.SliceTensorShape(tss->shape(), &shp_s);
+    if (!s.ok()) {
+      VLOG(1) << "Failed to slice tensor " << name << ", slice "
+              << slice_s.DebugString() << ": " << s;
+      return false;
+    }
+    if (checkpoint::TensorProtoDataSize<T>(sts.data().data()) !=
+        shp_s.num_elements()) {
+      VLOG(1) << "Tensor " << name << ", slice " << slice_s.DebugString()
+              << " had an unexpected amount of data: expected = "
+              << shp_s.num_elements() << ", got = "
+              << checkpoint::TensorProtoDataSize<T>(sts.data().data());
+      return false;
+    }
     CopyDataFromTensorSliceToTensorSlice(
         tss->shape(), slice_s, slice,
         checkpoint::TensorProtoData<T>(sts.data().data()), data);
diff --git a/tensorflow/core/util/tensor_slice_reader_test.cc b/tensorflow/core/util/tensor_slice_reader_test.cc
index fe617e8e30d67e..eb853886071541 100644
--- a/tensorflow/core/util/tensor_slice_reader_test.cc
+++ b/tensorflow/core/util/tensor_slice_reader_test.cc
@@ -13,15 +13,20 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <utility>
-
 #include "tensorflow/core/util/tensor_slice_reader.h"
 
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/io/iterator.h"
 #include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/io/table.h"
+#include "tensorflow/core/lib/io/table_builder.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/env.h"
@@ -30,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/public/version.h"
+#include "tensorflow/core/util/saved_tensor_slice.pb.h"
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 #include "tensorflow/core/util/tensor_slice_reader_cache.h"
 #include "tensorflow/core/util/tensor_slice_writer.h"
@@ -309,6 +315,177 @@ TEST_SIMPLE_INT(int16, int32)
 TEST_SIMPLE_INT(int8, int32)
 TEST_SIMPLE_INT(uint8, int32)
 
+// Modifies the SavedTensorSlices messages in a checkpoint to allow creating
+// malformed or unsupported checkpoints.
+void MutateSavedTensorSlices(
+    const std::string& fname,
+    const std::function<std::string(SavedTensorSlices)>& mutator) {
+  table::Options options;
+  options.compression = table::kNoCompression;
+
+  // Read all entres from the table.
+  std::vector<std::pair<std::string, std::string>> entries;
+  {
+    std::unique_ptr<RandomAccessFile> file;
+    TF_CHECK_OK(Env::Default()->NewRandomAccessFile(fname, &file));
+    uint64 file_size;
+    TF_CHECK_OK(Env::Default()->GetFileSize(fname, &file_size));
+    table::Table* t;
+    TF_CHECK_OK(table::Table::Open(options, file.get(), file_size, &t));
+    std::unique_ptr<table::Table> table(t);
+    std::unique_ptr<table::Iterator> it(table->NewIterator());
+    for (it->Seek(""); it->Valid(); it->Next()) {
+      entries.emplace_back(it->key(), it->value());
+    }
+    TF_CHECK_OK(it->status());
+  }
+
+  // Rewrite the table, mutating each value.
+  {
+    std::unique_ptr<WritableFile> file;
+    TF_CHECK_OK(Env::Default()->NewWritableFile(fname, &file));
+    table::TableBuilder builder(options, file.get());
+    for (const auto& entry : entries) {
+      SavedTensorSlices sts;
+      CHECK(sts.ParseFromString(entry.second));
+      builder.Add(entry.first, mutator(std::move(sts)));
+    }
+    TF_CHECK_OK(builder.Finish());
+    TF_CHECK_OK(file->Close());
+  }
+}
+
+TEST(TensorSliceReaderTest, MissingTensorType) {
+  const string fname = io::JoinPath(testing::TmpDir(), "invalid_checkpoint");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TensorShape shape({4, 5});
+  TensorSlice slice = TensorSlice::ParseOrDie("0,2:-");
+  TF_CHECK_OK(writer.Add("test", shape, slice, data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.clear_type();
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_CHECK_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the
+  // unset (invalid) type.
+  EXPECT_TRUE(reader.HasTensor("test", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor("test", &tensor).ok());
+}
+
+TEST(TensorSliceReaderTest, UnsupportedTensorType) {
+  const string fname = io::JoinPath(testing::TmpDir(), "int32_ref_checkpoint");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TensorShape shape({4, 5});
+  TensorSlice slice = TensorSlice::ParseOrDie("0,2:-");
+  TF_CHECK_OK(writer.Add("test", shape, slice, data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.set_type(DT_INT32_REF);
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_CHECK_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the
+  // unsupported type.
+  EXPECT_TRUE(reader.HasTensor("test", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor("test", &tensor).ok());
+}
+
+TEST(TensorSliceReaderTest, NegativeTensorShapeDimension) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), "negative_dim_checkpoint");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_CHECK_OK(writer.Add("test", TensorShape({4, 5}),
+                         TensorSlice::ParseOrDie("0,2:-"), data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        for (auto& dim : *tensor.mutable_shape()->mutable_dim()) {
+          dim.set_size(-dim.size());
+        }
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  // The negative dimension should cause loading to fail.
+  EXPECT_FALSE(reader.status().ok());
+}
+
+TEST(TensorSliceReaderTest, InvalidTensorSlice) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), "invalid_slice_checkpoint");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_CHECK_OK(writer.Add("test", TensorShape({4, 5}),
+                         TensorSlice::ParseOrDie("0,2:-"), data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.mutable_slice(0)->mutable_extent(0)->set_length(-10);
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  // The negative exent length should cause loading to fail.
+  EXPECT_FALSE(reader.status().ok());
+}
+
+TEST(TensorSliceReaderTest, MissingTensorData) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), "missing_data_checkpoint");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_ASSERT_OK(writer.Add("test", TensorShape({4, 5}),
+                          TensorSlice::ParseOrDie("0,2:-"), data));
+  TF_ASSERT_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [&](SavedTensorSlices sts) {
+    if (sts.has_data()) {
+      // Replace the data with only 4 elements.
+      Fill(data, 4, sts.mutable_data()->mutable_data());
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_ASSERT_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the missing
+  // data.
+  EXPECT_TRUE(reader.HasTensor("test", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor("test", &tensor).ok());
+}
+
 void CachedTensorSliceReaderTesterHelper(
     const TensorSliceWriter::CreateBuilderFunction& create_function,
     const TensorSliceReader::OpenTableFunction& open_function) {
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 597f81194cdbc2..dcb3368313f931 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -544,6 +544,7 @@ cc_test(
         "testdata/test_min_runtime.bin",
         "testdata/test_model.bin",
         "testdata/test_model_broken.bin",
+        "testdata/unsupported_recursion.bin",
     ],
     tags = [
         "tflite_not_portable",
diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c
index 0264f420b12277..49fd54e469f033 100644
--- a/tensorflow/lite/c/common.c
+++ b/tensorflow/lite/c/common.c
@@ -43,8 +43,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 #ifndef TF_LITE_STATIC_MEMORY
 
 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
-  TfLiteIntArray* ret =
-      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
+  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
+  if (alloc_size <= 0) return NULL;
+  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
+  if (!ret) return ret;
   ret->size = size;
   return ret;
 }
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index cece4ecba8750f..db21968812f900 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -151,6 +151,41 @@ TfLiteStatus ValidateCustomAllocationForTensor(
   TF_LITE_ENSURE(context, data_ptr_value % kDefaultTensorAlignment == 0);
   return kTfLiteOk;
 }
+// An utility test to detect if the subgraph is abused:
+// 1. Detects if recursion exists in the graph (recursion is not currently
+//    supported.
+// 2. Detects if the interpreter / subgraph is used in multiple subgraphs.
+//    Note: It's clearly documented that the interpreter / subgraph are not
+//    thread-safe. This serves as a check with possible false negatives
+//    unless we switch to atomic boolean flags.
+class SubgraphGuard {
+ public:
+  SubgraphGuard(TfLiteContext* context, bool* is_subgraph_in_use)
+      : is_subgraph_in_use_(is_subgraph_in_use) {
+    if (*is_subgraph_in_use_) {
+      TF_LITE_KERNEL_LOG(
+          context,
+          "Subgraph is already in use. Using an interpreter or a subgraph in "
+          "multiple threads is not supported. Recursion in the graph is not "
+          "supported.");
+      status_ = kTfLiteError;
+    } else {
+      *is_subgraph_in_use_ = true;
+    }
+  }
+  ~SubgraphGuard() {
+    // If tht original status was OK, recover the boolean flag.
+    if (status_ == kTfLiteOk) {
+      *is_subgraph_in_use_ = false;
+    }
+  }
+
+  TfLiteStatus status() const { return status_; }
+
+ private:
+  TfLiteStatus status_ = kTfLiteOk;
+  bool* is_subgraph_in_use_;
+};
 
 }  // namespace
 
@@ -652,6 +687,7 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
 
 TfLiteStatus Subgraph::AllocateTensors() {
   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
+
   if (!consistent_) {
     ReportError("AllocateTensors() called on inconsistent model.");
     return kTfLiteError;
@@ -675,6 +711,12 @@ TfLiteStatus Subgraph::AllocateTensors() {
     return kTfLiteOk;
   }
 
+  // Note `AllocateTensors` sometimes calls itself recursively above
+  // for delegates. Therefore only the logic below need to be guarded
+  // by `SubgraphGuard`.
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   next_execution_plan_index_to_prepare_ = 0;
   next_execution_plan_index_to_plan_allocation_ = 0;
   next_original_execution_plan_index_to_prepare_ = 0;
@@ -977,6 +1019,9 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() {
 }
 
 TfLiteStatus Subgraph::Invoke() {
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   if (!consistent_) {
     ReportError("Invoke called on model that is not consistent.");
     return kTfLiteError;
@@ -1033,10 +1078,17 @@ TfLiteStatus Subgraph::Invoke() {
         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
       }
       if (tensor->data.raw == nullptr && tensor->bytes > 0) {
-        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1) {
+        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
+            tensor->dims->size != 1) {
           // In general, having a tensor here with no buffer will be an error.
-          // However, for the reshape operator, the second input tensor is only
-          // used for the shape, not for the data. Thus, null buffer is ok.
+          // However, for the reshape operator, the second input tensor is
+          // sometimes only used for the shape, not for the data. Thus, null
+          // buffer is ok in this situation.
+          // The situation where null buffer is not ok for reshape operator is
+          // only when there are 2 inputs given to the node and the one
+          // corresponding to the shape (i == 1) is a vector that contains all
+          // dimensions. See `GetOutputShape()` function in
+          // `tensorflow/lite/kernels/reshape.cc`
           continue;
         } else {
           // In all other cases, we need to return an error as otherwise we will
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
index ed3b55ce630978..a3ae0f32e47640 100644
--- a/tensorflow/lite/core/subgraph.h
+++ b/tensorflow/lite/core/subgraph.h
@@ -741,6 +741,10 @@ class Subgraph {
 
   // A map of resources. Owned by interpreter and shared by multiple subgraphs.
   resource::ResourceMap* resources_ = nullptr;
+
+  // Whether the subgraph is currently in use (e.g. running the `Invoke`
+  // or `AllocateTensors` functions).
+  bool is_subgraph_in_use_ = false;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/arg_min_max.cc b/tensorflow/lite/kernels/arg_min_max.cc
index f782f94a9b0057..3719d816711ed0 100644
--- a/tensorflow/lite/kernels/arg_min_max.cc
+++ b/tensorflow/lite/kernels/arg_min_max.cc
@@ -42,6 +42,9 @@ TfLiteStatus ResizeOutput(TfLiteContext* context, const TfLiteTensor* input,
     axis_value += NumDimensions(input);
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
+
   // Copy the input dimensions to output except the axis dimension.
   TfLiteIntArray* output_dims = TfLiteIntArrayCreate(NumDimensions(input) - 1);
   int j = 0;
diff --git a/tensorflow/lite/kernels/batch_to_space_nd.cc b/tensorflow/lite/kernels/batch_to_space_nd.cc
index 9d6492e0fcbf06..044ac1b3a5ee5d 100644
--- a/tensorflow/lite/kernels/batch_to_space_nd.cc
+++ b/tensorflow/lite/kernels/batch_to_space_nd.cc
@@ -78,6 +78,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
   int output_batch_size = input_size->data[0];
   for (int dim = 0; dim < spatial_dims_num; ++dim) {
     // Number of batch must be multiple of (block_shape[dim]).
+    TF_LITE_ENSURE(context, block_shape[dim] != 0);
     TF_LITE_ENSURE_EQ(context, output_batch_size % block_shape[dim], 0);
     output_batch_size = output_batch_size / block_shape[dim];
     output_size->data[dim + 1] = input_size->data[dim + 1] * block_shape[dim] -
diff --git a/tensorflow/lite/kernels/concatenation.cc b/tensorflow/lite/kernels/concatenation.cc
index 01f7f9fcc48d85..73df086322a88b 100644
--- a/tensorflow/lite/kernels/concatenation.cc
+++ b/tensorflow/lite/kernels/concatenation.cc
@@ -16,6 +16,8 @@ limitations under the License.
 
 #include <stdint.h>
 
+#include <limits>
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
@@ -71,6 +73,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_EQ(context, t->type, input_type);
     for (int d = 0; d < t0->dims->size; ++d) {
       if (d == axis) {
+        // Avoid integer overflow in sum_axis below
+        TF_LITE_ENSURE(context, t->dims->data[axis] >= 0);
+        TF_LITE_ENSURE(context, t->dims->data[axis] <=
+                                    std::numeric_limits<int>::max() - sum_axis);
         sum_axis += t->dims->data[axis];
       } else {
         TF_LITE_ENSURE_EQ(context, t->dims->data[d], t0->dims->data[d]);
diff --git a/tensorflow/lite/kernels/conv.cc b/tensorflow/lite/kernels/conv.cc
index 5c978f8dbfb39a..6485d69f536d8b 100644
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@@ -521,6 +521,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
     // Only one scale factor per batch is typically necessary. See optimized
     // implementation for why we need to allocate for the height of the inputs
     // flattened to 2D.
+    TF_LITE_ENSURE(context, channels_in != 0);
     const int height = NumElements(input) / channels_in;
     int scaling_dims[1] = {height};
     if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
@@ -563,6 +564,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
       input_offsets->type = kTfLiteInt32;
       input_offsets->allocation_type = kTfLiteArenaRw;
       // See above comment for the need to allocate for height of inputs.
+      TF_LITE_ENSURE(context, channels_in != 0);
       const int height = NumElements(input) / channels_in;
       const int input_offset_dims[1] = {height};
       if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1,
@@ -827,8 +829,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
   TfLiteTensor* quantized_input_tensor;
   TF_LITE_ENSURE_OK(context,
                     GetTemporarySafe(context, node, data->input_quantized_index,
@@ -921,8 +924,9 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
 
   const float* input_ptr = GetTensorData<float>(input);
   TfLiteTensor* quantized_input_tensor;
diff --git a/tensorflow/lite/kernels/depth_to_space.cc b/tensorflow/lite/kernels/depth_to_space.cc
index d61049f85a9adb..9b552541c74b7b 100644
--- a/tensorflow/lite/kernels/depth_to_space.cc
+++ b/tensorflow/lite/kernels/depth_to_space.cc
@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   const int input_channels = input->dims->data[3];
diff --git a/tensorflow/lite/kernels/depth_to_space_test.cc b/tensorflow/lite/kernels/depth_to_space_test.cc
index 4429faf9909178..c03512dd710ad7 100644
--- a/tensorflow/lite/kernels/depth_to_space_test.cc
+++ b/tensorflow/lite/kernels/depth_to_space_test.cc
@@ -60,6 +60,11 @@ TEST(DepthToSpaceOpModel, BadBlockSize) {
   EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 4),
                "Cannot allocate tensors");
 }
+
+TEST(DepthToSpaceOpModel, NoBlockSize) {
+  EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 0),
+               "Cannot allocate tensors");
+}
 #endif
 
 TEST(DepthToSpaceOpModel, Float32) {
diff --git a/tensorflow/lite/kernels/depthwise_conv.cc b/tensorflow/lite/kernels/depthwise_conv.cc
index a76853da190e23..c83a042af3e7e6 100644
--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
@@ -176,6 +176,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   if (data_type != kTfLiteFloat32) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -195,6 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   if (is_hybrid) {
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -285,8 +287,8 @@ TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
                                     int16* depth_multiplier) {
   int num_filter_channels = SizeOfDimension(filter, 3);
   int num_input_channels = SizeOfDimension(input, 3);
+  TF_LITE_ENSURE(context, num_input_channels != 0);
   TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
-
   *depth_multiplier = num_filter_channels / num_input_channels;
   return kTfLiteOk;
 }
@@ -455,8 +457,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
   TfLiteTensor* input_quantized;
   TF_LITE_ENSURE_OK(context,
                     GetTemporarySafe(context, node, data->input_quantized_index,
@@ -494,6 +497,7 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.weights_offset = 0;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
+  TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
   if (kernel_type == kReference) {
diff --git a/tensorflow/lite/kernels/div.cc b/tensorflow/lite/kernels/div.cc
index f744b4ba1b7f63..51623a969d1b11 100644
--- a/tensorflow/lite/kernels/div.cc
+++ b/tensorflow/lite/kernels/div.cc
@@ -216,9 +216,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
 
-  if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
+  // TODO(b/193904910): This can written with C++ templates
+#define TF_LITE_CHECK_DIV_NON_ZERO(data_type)                       \
+  const auto* input2_data = GetTensorData<data_type>(input2);       \
+  const size_t input2_elements = input2->bytes / sizeof(data_type); \
+  for (size_t i = 0; i < input2_elements; i++) {                    \
+    TF_LITE_ENSURE(context, input2_data[i] != 0);                   \
+  }
+
+  if (output->type == kTfLiteFloat32) {
+    // Div by zero seems ok in this case, just like in TF case infinities are
+    // returned. So we don't do a check at this point.
+    EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteInt32) {
+    TF_LITE_CHECK_DIV_NON_ZERO(int32_t);
     EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
   } else if (output->type == kTfLiteUInt8) {
+    TF_LITE_CHECK_DIV_NON_ZERO(uint8_t);
     TF_LITE_ENSURE_OK(
         context, EvalQuantized<kernel_type>(context, node, params, data, input1,
                                             input2, output));
@@ -229,6 +243,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         output->type);
     return kTfLiteError;
   }
+#undef TF_LITE_CHECK_DIV_NON_ZERO
 
   return kTfLiteOk;
 }
diff --git a/tensorflow/lite/kernels/embedding_lookup.cc b/tensorflow/lite/kernels/embedding_lookup.cc
index d865f69eb9bd3c..092868d5a6a745 100644
--- a/tensorflow/lite/kernels/embedding_lookup.cc
+++ b/tensorflow/lite/kernels/embedding_lookup.cc
@@ -71,6 +71,10 @@ TfLiteStatus EvalSimple(TfLiteContext* context, TfLiteNode* node,
                         const TfLiteTensor* lookup, const TfLiteTensor* value,
                         TfLiteTensor* output) {
   const int row_size = SizeOfDimension(value, 0);
+  if (row_size == 0) {
+    // Propagate empty tensor if input is empty
+    return kTfLiteOk;
+  }
   const int row_bytes = value->bytes / row_size;
 
   char* output_raw = GetTensorData<char>(output);
diff --git a/tensorflow/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/lite/kernels/embedding_lookup_sparse.cc
index e9ad7e50cf1337..4ad1054340c9c3 100644
--- a/tensorflow/lite/kernels/embedding_lookup_sparse.cc
+++ b/tensorflow/lite/kernels/embedding_lookup_sparse.cc
@@ -173,6 +173,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Resize output tensor.
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
+  TF_LITE_ENSURE(context, output_shape != nullptr);
   int k = 0;
   int embedding_size = 1;
   int lookup_size = 1;
diff --git a/tensorflow/lite/kernels/expand_dims.cc b/tensorflow/lite/kernels/expand_dims.cc
index 950131c8d693b8..8398e733a47072 100644
--- a/tensorflow/lite/kernels/expand_dims.cc
+++ b/tensorflow/lite/kernels/expand_dims.cc
@@ -38,6 +38,7 @@ TfLiteStatus ExpandTensorDim(TfLiteContext* context, const TfLiteTensor& input,
     axis = input_dims.size + 1 + axis;
   }
   TF_LITE_ENSURE(context, axis <= input_dims.size);
+  TF_LITE_ENSURE(context, axis >= 0);
 
   TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_dims.size + 1);
   for (int i = 0; i < output_dims->size; ++i) {
diff --git a/tensorflow/lite/kernels/fully_connected.cc b/tensorflow/lite/kernels/fully_connected.cc
index 1ba3932b476b3c..97207b513d439f 100644
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc
@@ -180,6 +180,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) {
   }
 
   TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2);
+  TF_LITE_ENSURE(context, filter->dims->data[1] != 0);
   const int batch_size = input_size / filter->dims->data[1];
   const int num_units = filter->dims->data[0];
 
diff --git a/tensorflow/lite/kernels/gather.cc b/tensorflow/lite/kernels/gather.cc
index 7ec53edb062741..676381d0b3cdc5 100644
--- a/tensorflow/lite/kernels/gather.cc
+++ b/tensorflow/lite/kernels/gather.cc
@@ -104,8 +104,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <typename InputT, typename PositionsT>
-TfLiteStatus Gather(const TfLiteGatherParams& params, const TfLiteTensor* input,
-                    const TfLiteTensor* positions, TfLiteTensor* output) {
+TfLiteStatus Gather(TfLiteContext* context, const TfLiteGatherParams& params,
+                    const TfLiteTensor* input, const TfLiteTensor* positions,
+                    TfLiteTensor* output) {
+  const PositionsT* indexes = GetTensorData<PositionsT>(positions);
+  bool indices_has_only_positive_elements = true;
+  const size_t num_indices = positions->bytes / sizeof(PositionsT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indexes[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   tflite::GatherParams op_params;
   op_params.axis = params.axis;
   optimized_ops::Gather(op_params, GetTensorShape(input),
@@ -120,7 +132,18 @@ TfLiteStatus GatherStrings(TfLiteContext* context, const TfLiteTensor* input,
                            const TfLiteTensor* positions,
                            TfLiteTensor* output) {
   DynamicBuffer buffer;
+
   const PositionT* indexes = GetTensorData<PositionT>(positions);
+  bool indices_has_only_positive_elements = true;
+  const size_t num_indices = positions->bytes / sizeof(PositionT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indexes[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   const PositionT num_strings = GetStringCount(input);
   const int num_indexes = NumElements(positions);
 
@@ -149,19 +172,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (positions->type == kTfLiteInt32) {
     switch (input->type) {
       case kTfLiteFloat32:
-        return Gather<float, int32_t>(*params, input, positions, output);
+        return Gather<float, int32_t>(context, *params, input, positions,
+                                      output);
       case kTfLiteUInt8:
-        return Gather<uint8_t, int32_t>(*params, input, positions, output);
+        return Gather<uint8_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt8:
-        return Gather<int8_t, int32_t>(*params, input, positions, output);
+        return Gather<int8_t, int32_t>(context, *params, input, positions,
+                                       output);
       case kTfLiteInt16:
-        return Gather<int16_t, int32_t>(*params, input, positions, output);
+        return Gather<int16_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt32:
-        return Gather<int32_t, int32_t>(*params, input, positions, output);
+        return Gather<int32_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt64:
-        return Gather<int64_t, int32_t>(*params, input, positions, output);
+        return Gather<int64_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteBool:
-        return Gather<bool, int32_t>(*params, input, positions, output);
+        return Gather<bool, int32_t>(context, *params, input, positions,
+                                     output);
       case kTfLiteString:
         return GatherStrings<int32_t>(context, input, positions, output);
       default:
@@ -173,19 +203,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (positions->type == kTfLiteInt64) {
     switch (input->type) {
       case kTfLiteFloat32:
-        return Gather<float, int64_t>(*params, input, positions, output);
+        return Gather<float, int64_t>(context, *params, input, positions,
+                                      output);
       case kTfLiteUInt8:
-        return Gather<uint8_t, int64_t>(*params, input, positions, output);
+        return Gather<uint8_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt8:
-        return Gather<int8_t, int64_t>(*params, input, positions, output);
+        return Gather<int8_t, int64_t>(context, *params, input, positions,
+                                       output);
       case kTfLiteInt16:
-        return Gather<int16_t, int64_t>(*params, input, positions, output);
+        return Gather<int16_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt32:
-        return Gather<int32_t, int64_t>(*params, input, positions, output);
+        return Gather<int32_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt64:
-        return Gather<int64_t, int64_t>(*params, input, positions, output);
+        return Gather<int64_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteBool:
-        return Gather<bool, int64_t>(*params, input, positions, output);
+        return Gather<bool, int64_t>(context, *params, input, positions,
+                                     output);
       case kTfLiteString:
         return GatherStrings<int64_t>(context, input, positions, output);
       default:
diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
index 836a9ffd450414..3ea2c47d3d3ffe 100644
--- a/tensorflow/lite/kernels/gather_nd.cc
+++ b/tensorflow/lite/kernels/gather_nd.cc
@@ -122,6 +122,17 @@ TfLiteStatus GatherNdString(const TfLiteTensor* params,
 template <typename IndicesT>
 TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params,
                           const TfLiteTensor* indices, TfLiteTensor* output) {
+  bool indices_has_only_positive_elements = true;
+  const auto* indices_values = GetTensorData<IndicesT>(indices);
+  const size_t num_indices = indices->bytes / sizeof(IndicesT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indices_values[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   switch (params->type) {
     case kTfLiteFloat32:
       return GatherNd<float, IndicesT>(params, indices, output);
@@ -152,6 +163,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
 
+  // Prevent division by 0 in the helper
+  TF_LITE_ENSURE(context, NumElements(params) > 0);
+
   switch (indices->type) {
     case kTfLiteInt32:
       return EvalGatherNd<int32_t>(context, params, indices, output);
diff --git a/tensorflow/lite/kernels/hashtable_lookup.cc b/tensorflow/lite/kernels/hashtable_lookup.cc
index 2563d8ade5f74e..0de24b11333c97 100644
--- a/tensorflow/lite/kernels/hashtable_lookup.cc
+++ b/tensorflow/lite/kernels/hashtable_lookup.cc
@@ -112,6 +112,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 2, &value));
 
   const int num_rows = SizeOfDimension(value, 0);
+  TF_LITE_ENSURE(context, num_rows != 0);
   const int row_bytes = value->bytes / num_rows;
   void* pointer = nullptr;
   DynamicBuffer buf;
diff --git a/tensorflow/lite/kernels/internal/averagepool_quantized_test.cc b/tensorflow/lite/kernels/internal/averagepool_quantized_test.cc
index cbc863645b74b9..fea343ae6b8824 100644
--- a/tensorflow/lite/kernels/internal/averagepool_quantized_test.cc
+++ b/tensorflow/lite/kernels/internal/averagepool_quantized_test.cc
@@ -40,12 +40,14 @@ void RunOneAveragePoolTest(const PoolParams& params,
   std::vector<int8> optimized_averagePool_output(buffer_size);
   std::vector<int8> reference_averagePool_output(buffer_size);
 
-  reference_integer_ops::AveragePool(params, input_shape, input_data,
-                                     output_shape,
-                                     reference_averagePool_output.data());
-  optimized_integer_ops::AveragePool(params, input_shape, input_data,
-                                     output_shape,
-                                     optimized_averagePool_output.data());
+  bool reference_success = reference_integer_ops::AveragePool(
+      params, input_shape, input_data, output_shape,
+      reference_averagePool_output.data());
+  bool optimized_success = optimized_integer_ops::AveragePool(
+      params, input_shape, input_data, output_shape,
+      optimized_averagePool_output.data());
+  EXPECT_TRUE(reference_success);
+  EXPECT_TRUE(optimized_success);
 
   for (int i = 0; i < buffer_size; i++) {
     EXPECT_TRUE(reference_averagePool_output[i] ==
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/pooling.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/pooling.h
index f2696500ab9874..dfe8bd9b545fc6 100644
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/pooling.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/pooling.h
@@ -145,7 +145,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
   }
 }
 
-inline void AveragePool16(const PoolParams& params,
+inline bool AveragePool16(const PoolParams& params,
                           const RuntimeShape& input_shape,
                           const int8* input_data,
                           const RuntimeShape& output_shape, int8* output_data) {
@@ -194,6 +194,7 @@ inline void AveragePool16(const PoolParams& params,
               std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
               (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          if (filter_count == 0) return false;
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const int8* input_ptr =
               input_data + depth_base +
@@ -281,16 +282,18 @@ inline void AveragePool16(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape, const int8* input_data,
                         const RuntimeShape& output_shape, int8* output_data) {
   if (params.filter_height * params.filter_width > 16 * 16) {
-    reference_integer_ops::AveragePool(params, input_shape, input_data,
-                                       output_shape, output_data);
+    return reference_integer_ops::AveragePool(params, input_shape, input_data,
+                                              output_shape, output_data);
   } else {
-    AveragePool16(params, input_shape, input_data, output_shape, output_data);
+    return AveragePool16(params, input_shape, input_data, output_shape,
+                         output_data);
   }
 }
 
diff --git a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h
index f206dfa9235428..0f1c50329c733d 100644
--- a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h
@@ -3763,7 +3763,7 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
                output_data, output_dims);
 }
 
-inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int kwidth, int kheight,
                         float output_activation_min,
@@ -3778,35 +3778,37 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.float_activation_min = output_activation_min;
   params.float_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int kwidth, int kheight, float* output_data,
                  const Dims<4>& output_dims) {
   float output_activation_min, output_activation_max;
   GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
 
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, kwidth, kheight, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, kwidth, kheight,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, float* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height, output_data,
+                         output_dims);
 }
 
-inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int filter_width, int filter_height,
                         int32 output_activation_min,
@@ -3821,13 +3823,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.quantized_activation_min = output_activation_min;
   params.quantized_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int filter_width, int filter_height,
                  int32 output_activation_min, int32 output_activation_max,
@@ -3841,21 +3843,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK_EQ(output_activation_min, 0);
     TFLITE_DCHECK_EQ(output_activation_max, 255);
   }
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, filter_width, filter_height, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, filter_width, filter_height,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, int32 output_activation_min,
                  int32 output_activation_max, uint8* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_activation_min,
-                  output_activation_max, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height,
+                         output_activation_min, output_activation_max,
+                         output_data, output_dims);
 }
 
 inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index 6cabea11ac4289..32812ed842df17 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -304,7 +304,7 @@ inline void BinaryBroadcastFiveFold(const ArithmeticParams& unswitched_params,
       // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
       input2_data_reset = input2_data_ptr;
     }
-  } else {
+  } else if (input1_data_ptr != nullptr) {
     // Special case of y4 == 1, in which the innermost loop is a single
     // element and can be combined with the next (y3) as an inner broadcast.
     //
@@ -3223,7 +3223,7 @@ inline int NodeOffset(int b, int h, int w, int height, int width) {
   return (b * height + h) * width + w;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const float* input_data,
                         const RuntimeShape& output_shape, float* output_data) {
@@ -3238,6 +3238,9 @@ inline void AveragePool(const PoolParams& params,
   const int stride_height = params.stride_height;
   const int stride_width = params.stride_width;
 
+  if (stride_height == 0) return false;
+  if (stride_width == 0) return false;
+
   // TODO(benoitjacob) make this a proper reference impl without Eigen!
   const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
   auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
@@ -3283,9 +3286,11 @@ inline void AveragePool(const PoolParams& params,
                                                   params.float_activation_min,
                                                   params.float_activation_max);
   }
+
+  return true;
 }
 
-inline void AveragePool16(const PoolParams& params,
+inline bool AveragePool16(const PoolParams& params,
                           const RuntimeShape& input_shape,
                           const uint8* input_data,
                           const RuntimeShape& output_shape,
@@ -3335,6 +3340,7 @@ inline void AveragePool16(const PoolParams& params,
               std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
               (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          if (filter_count == 0) return false;
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8* input_ptr =
               input_data + depth_base +
@@ -3417,7 +3423,7 @@ inline void AveragePool16(const PoolParams& params,
   }
 }
 
-inline void AveragePool32(const PoolParams& params,
+inline bool AveragePool32(const PoolParams& params,
                           const RuntimeShape& input_shape,
                           const uint8* input_data,
                           const RuntimeShape& output_shape,
@@ -3467,6 +3473,7 @@ inline void AveragePool32(const PoolParams& params,
               std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
               (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          if (filter_count == 0) return false;
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8* input_ptr =
               input_data + depth_base +
@@ -3553,16 +3560,19 @@ inline void AveragePool32(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const uint8* input_data,
                         const RuntimeShape& output_shape, uint8* output_data) {
   if (params.filter_height * params.filter_width > 16 * 16) {
-    AveragePool32(params, input_shape, input_data, output_shape, output_data);
+    return AveragePool32(params, input_shape, input_data, output_shape,
+                         output_data);
   } else {
-    AveragePool16(params, input_shape, input_data, output_shape, output_data);
+    return AveragePool16(params, input_shape, input_data, output_shape,
+                         output_data);
   }
 }
 
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
index 17944bc47dd5d3..2cb4dada8a66eb 100644
--- a/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h
@@ -21,7 +21,7 @@ limitations under the License.
 namespace tflite {
 namespace reference_integer_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int8_t* input_data,
                         const RuntimeShape& output_shape, int8_t* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
@@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
   }
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int16_t* input_data,
                         const RuntimeShape& output_shape,
@@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
diff --git a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h
index 85d3b674c9218d..88309ea9d0ca42 100644
--- a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h
@@ -1486,7 +1486,7 @@ void Sub(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data,
       output_data);
 }
 
-inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int kwidth, int kheight,
                         float output_activation_min,
@@ -1501,8 +1501,8 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.float_activation_min = output_activation_min;
   params.float_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // Transitional version that will be moved shortly to legacy_reference_ops, as
@@ -1561,29 +1561,31 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int kwidth, int kheight, float* output_data,
                  const Dims<4>& output_dims) {
   float output_activation_min, output_activation_max;
   GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
 
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, kwidth, kheight, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, kwidth, kheight,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, float* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height, output_data,
+                         output_dims);
 }
 
-inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int filter_width, int filter_height,
                         int32 output_activation_min,
@@ -1598,13 +1600,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.quantized_activation_min = output_activation_min;
   params.quantized_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int filter_width, int filter_height,
                  int32 output_activation_min, int32 output_activation_max,
@@ -1618,21 +1620,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK_EQ(output_activation_min, 0);
     TFLITE_DCHECK_EQ(output_activation_max, 255);
   }
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, filter_width, filter_height, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, filter_width, filter_height,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, int32 output_activation_min,
                  int32 output_activation_max, uint8* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_activation_min,
-                  output_activation_max, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height,
+                         output_activation_min, output_activation_max,
+                         output_data, output_dims);
 }
 
 inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
diff --git a/tensorflow/lite/kernels/internal/reference/pooling.h b/tensorflow/lite/kernels/internal/reference/pooling.h
index 0872f5210c8edb..ee30b8404464ba 100644
--- a/tensorflow/lite/kernels/internal/reference/pooling.h
+++ b/tensorflow/lite/kernels/internal/reference/pooling.h
@@ -23,7 +23,7 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const float* input_data,
                         const RuntimeShape& output_shape, float* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           const float average = total / filter_count;
           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
               ActivationFunctionWithMinMax(average, params.float_activation_min,
@@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const uint8_t* input_data,
                         const RuntimeShape& output_shape,
@@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           acc = (acc + filter_count / 2) / filter_count;
           acc = std::max(acc, params.quantized_activation_min);
           acc = std::min(acc, params.quantized_activation_max);
@@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
diff --git a/tensorflow/lite/kernels/kernel_util.cc b/tensorflow/lite/kernels/kernel_util.cc
index a834d8ab9132de..c5e23fe98e826d 100644
--- a/tensorflow/lite/kernels/kernel_util.cc
+++ b/tensorflow/lite/kernels/kernel_util.cc
@@ -111,6 +111,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
 TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
                                int index) {
   TfLiteTensor* tensor = GetMutableInput(context, node, index);
+  if (tensor == nullptr) return nullptr;
   return tensor->is_variable ? tensor : nullptr;
 }
 
diff --git a/tensorflow/lite/kernels/lsh_projection.cc b/tensorflow/lite/kernels/lsh_projection.cc
index 81f97ecf9a9ce7..92a5ee556f724c 100644
--- a/tensorflow/lite/kernels/lsh_projection.cc
+++ b/tensorflow/lite/kernels/lsh_projection.cc
@@ -28,7 +28,7 @@ limitations under the License.
 //
 // Input:
 //   Tensor[0]: Hash functions. Dim.size == 2, DataType: Float.
-//              Tensor[0].Dim[0]: Num of hash functions.
+//              Tensor[0].Dim[0]: Num of hash functions. Must be at least 1.
 //              Tensor[0].Dim[1]: Num of projected output bits generated by
 //                                each hash function.
 //   In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32.
@@ -82,6 +82,7 @@ TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input;
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input));
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+  TF_LITE_ENSURE(context, SizeOfDimension(input, 0) >= 1);
 
   if (NumInputs(node) == 3) {
     const TfLiteTensor* weight;
diff --git a/tensorflow/lite/kernels/maximum_minimum.cc b/tensorflow/lite/kernels/maximum_minimum.cc
index 777e51442f120e..176e020a5a8e55 100644
--- a/tensorflow/lite/kernels/maximum_minimum.cc
+++ b/tensorflow/lite/kernels/maximum_minimum.cc
@@ -157,35 +157,37 @@ template <KernelType kernel_type, typename OpType>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
 
-    switch (op_context.output->type) {
-      case kTfLiteFloat32:
-        TFLiteOperation<kernel_type, float, OpType>(context, node, op_context);
-        break;
-      case kTfLiteUInt8:
-        TFLiteOperation<kernel_type, uint8_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt8:
-        TFLiteOperation<kernel_type, int8_t, OpType>(context, node, op_context);
-        break;
-      case kTfLiteInt32:
-        TFLiteOperation<kernel_type, int32_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt64:
-        TFLiteOperation<kernel_type, int64_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt16:
-        TFLiteOperation<kernel_type, int16_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      default:
-        context->ReportError(context,
-                             "Type %d is currently not supported by Maximum.",
-                             op_context.output->type);
-        return kTfLiteError;
-    }
+  // If inputs have no element, shortcircuit.
+  if (NumElements(op_context.input1) == 0 ||
+      NumElements(op_context.input2) == 0) {
+    return kTfLiteOk;
+  }
+
+  switch (op_context.output->type) {
+    case kTfLiteFloat32:
+      TFLiteOperation<kernel_type, float, OpType>(context, node, op_context);
+      break;
+    case kTfLiteUInt8:
+      TFLiteOperation<kernel_type, uint8_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt8:
+      TFLiteOperation<kernel_type, int8_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt32:
+      TFLiteOperation<kernel_type, int32_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt64:
+      TFLiteOperation<kernel_type, int64_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt16:
+      TFLiteOperation<kernel_type, int16_t, OpType>(context, node, op_context);
+      break;
+    default:
+      context->ReportError(context,
+                           "Type %d is currently not supported by Maximum.",
+                           op_context.output->type);
+      return kTfLiteError;
+  }
   return kTfLiteOk;
 }
 
diff --git a/tensorflow/lite/kernels/one_hot.cc b/tensorflow/lite/kernels/one_hot.cc
index f7b4e8e7e19d57..75bfb48d6b19c8 100644
--- a/tensorflow/lite/kernels/one_hot.cc
+++ b/tensorflow/lite/kernels/one_hot.cc
@@ -69,6 +69,11 @@ void OneHotComputeImpl(const OneHotContext& op_context) {
   for (int i = 0; i < op_context.axis; ++i) {
     prefix_dim_size *= op_context.indices->dims->data[i];
   }
+  if (prefix_dim_size == 0) {
+    // If indices tensor is degenerate, return a degenerate tensor, just like
+    // TensorFlow does.
+    return;
+  }
   const int suffix_dim_size = NumElements(op_context.indices) / prefix_dim_size;
   const int depth = *op_context.depth->data.i32;
 
diff --git a/tensorflow/lite/kernels/padding.h b/tensorflow/lite/kernels/padding.h
index 1116b1da852cf6..6b4ab7fa58d1aa 100644
--- a/tensorflow/lite/kernels/padding.h
+++ b/tensorflow/lite/kernels/padding.h
@@ -44,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
 inline int ComputeOutSize(TfLitePadding padding, int image_size,
                           int filter_size, int stride, int dilation_rate = 1) {
   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+
+  // TODO(b/186448822): This uses 0 since the function has no other way to
+  // report error case
+  if (stride == 0) return 0;
+
   switch (padding) {
     case kTfLitePaddingSame:
       return (image_size + stride - 1) / stride;
diff --git a/tensorflow/lite/kernels/pooling.cc b/tensorflow/lite/kernels/pooling.cc
index 1ae3d207b135ef..d54bd89b221511 100644
--- a/tensorflow/lite/kernels/pooling.cc
+++ b/tensorflow/lite/kernels/pooling.cc
@@ -87,6 +87,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
   auto padding = params->padding;
   int out_width, out_height;
 
+  // Prevent division by 0 in optimized pooling implementations
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   data->padding = ComputePaddingHeightWidth(
       params->stride_height, params->stride_width, 1, 1, height, width,
       params->filter_height, params->filter_width, padding, &out_height,
@@ -113,117 +117,126 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <KernelType kernel_type>
-void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, OpData* data,
-                      const TfLiteTensor* input, TfLiteTensor* output) {
+TfLiteStatus AverageEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                              TfLitePoolParams* params, OpData* data,
+                              const TfLiteTensor* input, TfLiteTensor* output) {
   float activation_min, activation_max;
   CalculateActivationRange(params->activation, &activation_min,
                            &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                       \
-  tflite::PoolParams op_params;                                          \
-  op_params.stride_height = params->stride_height;                       \
-  op_params.stride_width = params->stride_width;                         \
-  op_params.filter_height = params->filter_height;                       \
-  op_params.filter_width = params->filter_width;                         \
-  op_params.padding_values.height = data->padding.height;                \
-  op_params.padding_values.width = data->padding.width;                  \
-  op_params.float_activation_min = activation_min;                       \
-  op_params.float_activation_max = activation_max;                       \
-  type::AveragePool(op_params, GetTensorShape(input),                    \
-                    GetTensorData<float>(input), GetTensorShape(output), \
-                    GetTensorData<float>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.float_activation_min = activation_min;                            \
+  op_params.float_activation_max = activation_max;                            \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<float>(input),      \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<float>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node,
-                               TfLitePoolParams* params, OpData* data,
-                               const TfLiteTensor* input,
-                               TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node,
+                                       TfLitePoolParams* params, OpData* data,
+                                       const TfLiteTensor* input,
+                                       TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
   (void)CalculateActivationRangeQuantized(context, params->activation, output,
                                           &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                         \
-  tflite::PoolParams op_params;                                            \
-  op_params.stride_height = params->stride_height;                         \
-  op_params.stride_width = params->stride_width;                           \
-  op_params.filter_height = params->filter_height;                         \
-  op_params.filter_width = params->filter_width;                           \
-  op_params.padding_values.height = data->padding.height;                  \
-  op_params.padding_values.width = data->padding.width;                    \
-  op_params.quantized_activation_min = activation_min;                     \
-  op_params.quantized_activation_max = activation_max;                     \
-  type::AveragePool(op_params, GetTensorShape(input),                      \
-                    GetTensorData<uint8_t>(input), GetTensorShape(output), \
-                    GetTensorData<uint8_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<uint8_t>(input),    \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<uint8_t>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
-                              TfLitePoolParams* params, OpData* data,
-                              const TfLiteTensor* input, TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
+                                      TfLitePoolParams* params, OpData* data,
+                                      const TfLiteTensor* input,
+                                      TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
 
   (void)CalculateActivationRangeQuantized(context, params->activation, output,
                                           &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                        \
-  tflite::PoolParams op_params;                                           \
-  op_params.stride_height = params->stride_height;                        \
-  op_params.stride_width = params->stride_width;                          \
-  op_params.filter_height = params->filter_height;                        \
-  op_params.filter_width = params->filter_width;                          \
-  op_params.padding_values.height = data->padding.height;                 \
-  op_params.padding_values.width = data->padding.width;                   \
-  op_params.quantized_activation_min = activation_min;                    \
-  op_params.quantized_activation_max = activation_max;                    \
-  type::AveragePool(op_params, GetTensorShape(input),                     \
-                    GetTensorData<int8_t>(input), GetTensorShape(output), \
-                    GetTensorData<int8_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<int8_t>(input),     \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<int8_t>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_integer_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_integer_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
-                               TfLitePoolParams* params, OpData* data,
-                               const TfLiteTensor* input,
-                               TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
+                                       TfLitePoolParams* params, OpData* data,
+                                       const TfLiteTensor* input,
+                                       TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
   CalculateActivationRangeQuantized(context, params->activation, output,
                                     &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                         \
-  tflite::PoolParams op_params;                                            \
-  op_params.stride_height = params->stride_height;                         \
-  op_params.stride_width = params->stride_width;                           \
-  op_params.filter_height = params->filter_height;                         \
-  op_params.filter_width = params->filter_width;                           \
-  op_params.padding_values.height = data->padding.height;                  \
-  op_params.padding_values.width = data->padding.width;                    \
-  op_params.quantized_activation_min = activation_min;                     \
-  op_params.quantized_activation_max = activation_max;                     \
-  type::AveragePool(op_params, GetTensorShape(input),                      \
-                    GetTensorData<int16_t>(input), GetTensorShape(output), \
-                    GetTensorData<int16_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<int16_t>(input),    \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<int16_t>(output)))
   TF_LITE_AVERAGE_POOL(reference_integer_ops);
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
@@ -376,20 +389,17 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      AverageEvalFloat<kernel_type>(context, node, params, data, input, output);
-      break;
+      return AverageEvalFloat<kernel_type>(context, node, params, data, input,
+                                           output);
     case kTfLiteUInt8:
-      AverageEvalQuantizedUint8<kernel_type>(context, node, params, data, input,
-                                             output);
-      break;
+      return AverageEvalQuantizedUint8<kernel_type>(context, node, params, data,
+                                                    input, output);
     case kTfLiteInt8:
-      AverageEvalQuantizedInt8<kernel_type>(context, node, params, data, input,
-                                            output);
-      break;
+      return AverageEvalQuantizedInt8<kernel_type>(context, node, params, data,
+                                                   input, output);
     case kTfLiteInt16:
-      AverageEvalQuantizedInt16<kernel_type>(context, node, params, data, input,
-                                             output);
-      break;
+      return AverageEvalQuantizedInt16<kernel_type>(context, node, params, data,
+                                                    input, output);
     default:
       TF_LITE_KERNEL_LOG(context, "Type %s not currently supported.",
                          TfLiteTypeGetName(input->type));
diff --git a/tensorflow/lite/kernels/pooling_test.cc b/tensorflow/lite/kernels/pooling_test.cc
index e614fedccfd500..108195388141df 100644
--- a/tensorflow/lite/kernels/pooling_test.cc
+++ b/tensorflow/lite/kernels/pooling_test.cc
@@ -1151,5 +1151,18 @@ TEST(FloatPoolingOpTest, L2PoolPaddingValidSlide1) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.0, 6.5}));
 }
 
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(FloatPoolingOpTest, MaxPoolWithZeroStride) {
+  EXPECT_DEATH(
+      FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D,
+                            /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+                            /*filter_width=*/2, /*filter_height=*/2,
+                            /*output=*/{TensorType_FLOAT32, {}},
+                            /*padding=*/Padding_VALID,
+                            /*stride_w=*/0, /*stride_h=*/0),
+      "Cannot allocate tensors");
+}
+#endif
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/space_to_batch_nd.cc b/tensorflow/lite/kernels/space_to_batch_nd.cc
index 0d537e2d1892fe..af7b9d9e914a1e 100644
--- a/tensorflow/lite/kernels/space_to_batch_nd.cc
+++ b/tensorflow/lite/kernels/space_to_batch_nd.cc
@@ -79,6 +79,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
   for (int dim = 0; dim < spatial_dims_num; ++dim) {
     int final_dim_size = (input_size->data[dim + 1] + paddings_data[dim * 2] +
                           paddings_data[dim * 2 + 1]);
+    TF_LITE_ENSURE(context, block_shape[dim] != 0);
     TF_LITE_ENSURE_EQ(context, final_dim_size % block_shape[dim], 0);
     output_size->data[dim + 1] = final_dim_size / block_shape[dim];
     output_batch_size *= block_shape[dim];
diff --git a/tensorflow/lite/kernels/space_to_depth.cc b/tensorflow/lite/kernels/space_to_depth.cc
index 4db7440b57e60b..35a0fe1c26e207 100644
--- a/tensorflow/lite/kernels/space_to_depth.cc
+++ b/tensorflow/lite/kernels/space_to_depth.cc
@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   int output_height = input_height / block_size;
diff --git a/tensorflow/lite/kernels/split.cc b/tensorflow/lite/kernels/split.cc
index 251a9d893c30a0..95ced1e1c0cd43 100644
--- a/tensorflow/lite/kernels/split.cc
+++ b/tensorflow/lite/kernels/split.cc
@@ -60,6 +60,7 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
   TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
 
   const int input_size = SizeOfDimension(input, axis_value);
+  TF_LITE_ENSURE(context, num_splits != 0);
   TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0,
                      "Not an even split");
   const int slice_size = input_size / num_splits;
diff --git a/tensorflow/lite/kernels/split_v.cc b/tensorflow/lite/kernels/split_v.cc
index 054e00572f5a60..ed8a4851c1e97e 100644
--- a/tensorflow/lite/kernels/split_v.cc
+++ b/tensorflow/lite/kernels/split_v.cc
@@ -96,6 +96,8 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
     }
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
   const int input_size = SizeOfDimension(input, axis_value);
 
   if (minus_one_index != -1) {
diff --git a/tensorflow/lite/kernels/svdf.cc b/tensorflow/lite/kernels/svdf.cc
index 8f5c9a86bff5c6..34622dfbd574b1 100644
--- a/tensorflow/lite/kernels/svdf.cc
+++ b/tensorflow/lite/kernels/svdf.cc
@@ -99,6 +99,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int rank = params->rank;
   const int batch_size = input->dims->data[0];
   const int num_filters = weights_feature->dims->data[0];
+  TF_LITE_ENSURE(context, rank != 0);
   TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
   const int num_units = num_filters / rank;
   const int memory_size = weights_time->dims->data[1];
@@ -255,14 +256,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                                                      output_temp_size_array));
 
     // Calculate effective scales.
+    TF_LITE_ENSURE(context, input->quantization.type != kTfLiteNoQuantization);
     auto* input_params =
         reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
+    TF_LITE_ENSURE(context,
+                   weights_feature->quantization.type != kTfLiteNoQuantization);
     auto* weights_feature_params = reinterpret_cast<TfLiteAffineQuantization*>(
         weights_feature->quantization.params);
+    TF_LITE_ENSURE(context, state->quantization.type != kTfLiteNoQuantization);
     auto* state_params =
         reinterpret_cast<TfLiteAffineQuantization*>(state->quantization.params);
+    TF_LITE_ENSURE(context,
+                   weights_time->quantization.type != kTfLiteNoQuantization);
     auto* weight_time_params = reinterpret_cast<TfLiteAffineQuantization*>(
         weights_time->quantization.params);
+    TF_LITE_ENSURE(context, output->quantization.type != kTfLiteNoQuantization);
     auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
         output->quantization.params);
     const double effective_scale_1 = input_params->scale->data[0] *
@@ -298,6 +306,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                     GetTemporarySafe(context, node, /*index=*/0, &scratch));
 
   TfLiteTensor* state = GetVariableInput(context, node, kStateTensor);
+  TF_LITE_ENSURE(context, state != nullptr);
   TfLiteTensor* output;
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc
index 52ee0414dd67c4..ed314a85f50863 100644
--- a/tensorflow/lite/kernels/transpose_conv.cc
+++ b/tensorflow/lite/kernels/transpose_conv.cc
@@ -590,6 +590,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data);
 
+  // Prevent divisions by 0
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   // Resize any deferred dynamic tensors
   if (IsDynamicTensor(output)) {
     TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output));
diff --git a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
index d1a7c6ba9b2f15..2d41af4dcc93ea 100644
--- a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
@@ -62,8 +62,12 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
       context,
       GetOutputSafe(context, node, lstm::full::kOutputTensor, &output_tensor));
 
+  TF_LITE_ENSURE(context,
+                 cell_state->quantization.type != kTfLiteNoQuantization);
   auto* cell_state_params =
       static_cast<TfLiteAffineQuantization*>(cell_state->quantization.params);
+  TF_LITE_ENSURE(context,
+                 output_tensor->quantization.type != kTfLiteNoQuantization);
   auto* proj_params = static_cast<TfLiteAffineQuantization*>(
       output_tensor->quantization.params);
   if (cell_clip > 0.0) {
@@ -160,6 +164,8 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
       TfLiteTensor* intermediate;
       TF_LITE_ENSURE_OK(context,
                         GetIntermediatesSafe(context, node, i, &intermediate));
+      TF_LITE_ENSURE(context,
+                     intermediate->quantization.type != kTfLiteNoQuantization);
       auto* params = static_cast<TfLiteAffineQuantization*>(
           intermediate->quantization.params);
       intermediate_scale.push_back(params->scale->data[0]);
@@ -174,6 +180,7 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
   // is ignored.
   TfLiteTensor* hidden;
   TF_LITE_ENSURE_OK(context, GetIntermediatesSafe(context, node, 4, &hidden));
+  TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization);
   auto* hidden_params =
       static_cast<TfLiteAffineQuantization*>(hidden->quantization.params);
   intermediate_scale.push_back(hidden_params->scale->data[0]);
@@ -760,6 +767,8 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
 
   const TfLiteTensor* intermediate =
       &context->tensors[node->intermediates->data[4]];
+  TF_LITE_ENSURE(context,
+                 intermediate->quantization.type != kTfLiteNoQuantization);
   const auto* params =
       static_cast<TfLiteAffineQuantization*>(intermediate->quantization.params);
   const int32_t hidden_zp = params->zero_point->data[0];
diff --git a/tensorflow/lite/model_test.cc b/tensorflow/lite/model_test.cc
index 110c54aa571a7e..d962a3b51415e4 100644
--- a/tensorflow/lite/model_test.cc
+++ b/tensorflow/lite/model_test.cc
@@ -460,6 +460,25 @@ TEST(BasicFlatBufferModel, TestHandleMalformedModelReuseTensor) {
   ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
 }
 
+// Recursion & reentrant are not supported in TFLite.
+// The test ensures it fails gracefullly instead of crashing with
+// a stack overflow.
+TEST(BasicFlatBufferModel, TestUnsupportedRecursion) {
+  const auto model_path =
+      "tensorflow/lite/testdata/unsupported_recursion.bin";
+
+  std::unique_ptr<tflite::FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(model_path);
+  ASSERT_NE(model, nullptr);
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
 // The models here have a buffer index for a tensor pointing to a null buffer.
 // This results in the tensor being interpreted as read-write, but the model
 // assumes the tensor is read-only. As such, `interpreter->Invoke()` would
diff --git a/tensorflow/lite/testdata/unsupported_recursion.bin b/tensorflow/lite/testdata/unsupported_recursion.bin
new file mode 100644
index 00000000000000..525c5383ab4ef6
Binary files /dev/null and b/tensorflow/lite/testdata/unsupported_recursion.bin differ
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index e0ed58b4662f81..ba6522de133819 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -312,6 +312,7 @@ tf_py_test(
 tf_py_test(
     name = "io_test",
     srcs = ["io_test.py"],
+    tags = ["no_pip"],
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:util",
diff --git a/tensorflow/python/data/experimental/kernel_tests/io_test.py b/tensorflow/python/data/experimental/kernel_tests/io_test.py
index 7cbb745e0997db..db331f7325ab4f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/io_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/io_test.py
@@ -17,6 +17,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import os
 import shutil
 
@@ -99,6 +100,20 @@ def save_fn():
         self._test_dir, dataset.element_spec, compression=compression)
     self.assertDatasetProduces(dataset, range(42))
 
+  @combinations.generate(test_base.eager_only_combinations())
+  def testRepeatAndPrefetch(self):
+    """This test reproduces github.com/tensorflow/tensorflow/issues/49165"""
+    dataset1 = dataset_ops.Dataset.from_tensor_slices(np.random.rand(16, 32))
+    io.save(dataset1, self._test_dir)
+    dataset = io.load(self._test_dir)
+    dataset = dataset.shuffle(buffer_size=16)
+    dataset = dataset.batch(16)
+    dataset = dataset.repeat()
+    dataset = dataset.prefetch(1)
+    next_element = self.getNext(dataset)
+    for _ in range(30):
+      self.evaluate(next_element())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py b/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
index b1fa780f6b3c80..c1e3b1e9fa0b83 100644
--- a/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/snapshot_test.py
@@ -356,6 +356,19 @@ def make_dataset():
         num_runs_per_fingerprint=1,
         num_snapshot_shards_per_run=multiprocessing.cpu_count())
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testRepeatAndPrefetch(self):
+    """This test reproduces github.com/tensorflow/tensorflow/issues/48903."""
+    dataset = dataset_ops.Dataset.from_tensor_slices(np.random.rand(16, 32))
+    dataset = dataset.apply(snapshot.snapshot(self._snapshot_dir))
+    dataset = dataset.shuffle(buffer_size=16)
+    dataset = dataset.batch(16)
+    dataset = dataset.repeat()
+    dataset = dataset.prefetch(1)
+    next_element = self.getNext(dataset)
+    for _ in range(30):
+      self.evaluate(next_element())
+
 
 class LegacySnapshotDatasetTest(
     reader_dataset_ops_test_base.TFRecordDatasetTestBase,
diff --git a/tensorflow/python/data/kernel_tests/from_sparse_tensor_slices_test.py b/tensorflow/python/data/kernel_tests/from_sparse_tensor_slices_test.py
index d7a2c158de9a13..444b83cb72e5d4 100644
--- a/tensorflow/python/data/kernel_tests/from_sparse_tensor_slices_test.py
+++ b/tensorflow/python/data/kernel_tests/from_sparse_tensor_slices_test.py
@@ -84,6 +84,26 @@ def testFromSparseTensorSlices(self):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  @combinations.generate(combinations.combine(tf_api_version=1, mode=["graph"]))
+  def testEmptySparseTensorSlicesInvalid(self):
+    """Test a dataset based on invalid `tf.sparse.SparseTensor`."""
+    st = array_ops.sparse_placeholder(dtypes.float64)
+    iterator = dataset_ops.make_initializable_iterator(
+        dataset_ops.Dataset.from_sparse_tensor_slices(st))
+    init_op = iterator.initializer
+
+    with self.cached_session() as sess:
+      # Test with an empty sparse tensor but with non empty values.
+      empty_indices = np.empty((0, 4), dtype=np.int64)
+      non_empty_values = [1, 2, 3, 4]
+      empty_dense_shape = [0, 4, 37, 9]
+      sparse_feed = sparse_tensor.SparseTensorValue(empty_indices,
+                                                    non_empty_values,
+                                                    empty_dense_shape)
+      # Here, we expect the test to fail when running the feed.
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(init_op, feed_dict={st: sparse_feed})
+
   @combinations.generate(combinations.combine(tf_api_version=2, mode=["eager"]))
   def testFromSparseTensorSlicesError(self):
     with self.assertRaises(AttributeError):
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 5cc859aa033d84..9120d12afb016e 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -531,7 +531,7 @@ def embedding_matmul(a, b):
       ValueError: if `input_signature` is not None and the `python_function`'s
         argspec has keyword arguments.
     """
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     self._python_function = python_function
     self._function_spec = function_lib.FunctionSpec.from_function_and_signature(
         python_function,
@@ -571,7 +571,7 @@ def __getstate__(self):
   def __setstate__(self, state):
     """Restore from pickled state."""
     self.__dict__ = state
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     self._descriptor_cache = weakref.WeakKeyDictionary()
     self._key_for_call_stats = self._get_key_for_call_stats()
 
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 42af94c6cb1068..caae28efc6a1b3 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -28,6 +28,7 @@
 from six.moves import range
 
 from tensorflow.python.autograph.core import converter
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import constant_op
@@ -38,6 +39,7 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.module import module
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -984,6 +986,117 @@ def testDouble(self, a):
     self.assertAllEqual(obj2.testDouble.experimental_get_tracing_count(), 3)
     self.assertAllEqual(obj1.testDouble.experimental_get_tracing_count(), 2)
 
+  def test_recursive_tf_function(self):
+
+    @def_function.function
+    def recursive_fn(n):
+      if n > 0:
+        return recursive_fn(n - 1)
+      return 1
+
+    self.assertEqual(recursive_fn(5).numpy(), 1)
+
+  def test_recursive_tf_function_with_gradients(self):
+
+    @def_function.function
+    def recursive_fn(n, x):
+      if n > 0:
+        return n * recursive_fn(n - 1, x)
+      else:
+        return x
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g = recursive_fn(5, x)
+
+    dg_dx = tape.gradient(g, x)
+    self.assertEqual(dg_dx.numpy(), 120)
+
+  def test_recursive_python_function(self):
+
+    def recursive_py_fn(n):
+      if n > 0:
+        return recursive_py_fn(n - 1)
+      return 1
+
+    @def_function.function
+    def recursive_fn(n):
+      return recursive_py_fn(n)
+
+    self.assertEqual(recursive_fn(5).numpy(), 1)
+
+  def test_recursive_python_function_with_gradients(self):
+
+    def recursive_py_fn(n, x):
+      if n > 0:
+        return n * recursive_py_fn(n - 1, x)
+      return x
+
+    @def_function.function
+    def recursive_fn(n, x):
+      return recursive_py_fn(n, x)
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g = recursive_fn(5, x)
+
+    dg_dx = tape.gradient(g, x)
+    self.assertEqual(dg_dx.numpy(), 120)
+
+  def test_recursive_tf_function_call_each_other(self):
+
+    @def_function.function
+    def recursive_fn1(n):
+      if n <= 1:
+        return 1
+      return recursive_fn2(n - 1)
+
+    @def_function.function
+    def recursive_fn2(n):
+      if n <= 1:
+        return 2
+      return recursive_fn1(n - 1)
+
+    self.assertEqual(recursive_fn1(5).numpy(), 1)
+    self.assertEqual(recursive_fn1(6).numpy(), 2)
+    self.assertEqual(recursive_fn2(5).numpy(), 2)
+    self.assertEqual(recursive_fn2(6).numpy(), 1)
+
+  def test_recursive_tf_function_call_each_other_with_gradients(self):
+
+    @def_function.function
+    def recursive_fn1(n, x):
+      if n <= 1:
+        return x
+      return n * recursive_fn2(n - 1, x)
+
+    @def_function.function
+    def recursive_fn2(n, x):
+      if n <= 1:
+        return 2 * x
+      return n * recursive_fn1(n - 1, x)
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g1 = recursive_fn1(5, x)
+
+    dg1_dx = tape.gradient(g1, x)
+    self.assertEqual(dg1_dx.numpy(), 120)
+
+    with backprop.GradientTape() as tape:
+      g2 = recursive_fn2(5, x)
+
+    dg2_dx = tape.gradient(g2, x)
+    self.assertEqual(dg2_dx.numpy(), 240)
+
+  def test_recursive_tf_function_with_cond(self):
+    @def_function.function(autograph=False)
+    def recursive_fn(n):
+      return cond_v2.cond_v2(n > 0, recursive_fn(n - 1), 1)
+
+    with self.assertRaises(RecursionError):
+      recursive_fn(constant_op.constant(5))
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 9d9cf0b50c380a..67e6fa4328ee47 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -2926,7 +2926,7 @@ def __init__(self,
       self._hashable_input_signature = _make_input_signature_hashable(
           self.flat_input_signature)
 
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     # _descriptor_cache is a of instance of a class to an instance-specific
     # `Function`, used to make sure defun-decorated methods create different
     # functions for each instance.
diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py
index 57ae9ee92a764d..9d446b85f6bf38 100644
--- a/tensorflow/python/keras/engine/functional.py
+++ b/tensorflow/python/keras/engine/functional.py
@@ -58,7 +58,7 @@ class Functional(training_lib.Model):
   than with subclassed `Model`s, specifically:
 
   - Model cloning (`keras.models.clone`)
-  - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()`
+  - Serialization (`model.get_config()/from_config`, `model.to_json()`
   - Whole-model saving (`model.save()`)
 
   A `Functional` model can be instantiated by passing two arguments to
diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py
index 8427517f235413..aeff65e2ad41e9 100644
--- a/tensorflow/python/keras/engine/functional_test.py
+++ b/tensorflow/python/keras/engine/functional_test.py
@@ -52,11 +52,6 @@
 from tensorflow.python.platform import test
 from tensorflow.python.training.tracking.util import Checkpoint
 
-try:
-  import yaml  # pylint:disable=g-import-not-at-top
-except ImportError:
-  yaml = None
-
 
 class NetworkConstructionTest(keras_parameterized.TestCase):
 
@@ -632,10 +627,6 @@ def test_multi_input_multi_output_recursion(self):
       json_str = model.to_json()
       models.model_from_json(json_str)
 
-      if yaml is not None:
-        yaml_str = model.to_yaml()
-        models.model_from_yaml(yaml_str)
-
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def test_invalid_graphs(self):
     a = layers.Input(shape=(32,), name='input_a')
@@ -1391,10 +1382,6 @@ def test_constant_initializer_with_numpy(self):
     json_str = model.to_json()
     models.model_from_json(json_str)
 
-    if yaml is not None:
-      yaml_str = model.to_yaml()
-      models.model_from_yaml(yaml_str)
-
   def test_subclassed_error_if_init_not_called(self):
 
     class MyNetwork(training_lib.Model):
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index e0d53deebeec99..6e9c6f4885358b 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -91,11 +91,6 @@
   import h5py
 except ImportError:
   h5py = None
-
-try:
-  import yaml
-except ImportError:
-  yaml = None
 # pylint: enable=g-import-not-at-top
 
 
@@ -2281,6 +2276,9 @@ def to_json(self, **kwargs):
   def to_yaml(self, **kwargs):
     """Returns a yaml string containing the network configuration.
 
+    Note: Since TF 2.6, this method is no longer supported and will raise a
+    RuntimeError.
+
     To load a network from a yaml save file, use
     `keras.models.model_from_yaml(yaml_string, custom_objects={})`.
 
@@ -2296,12 +2294,12 @@ def to_yaml(self, **kwargs):
         A YAML string.
 
     Raises:
-        ImportError: if yaml module is not found.
+        RuntimeError: announces that the method poses a security risk
     """
-    if yaml is None:
-      raise ImportError(
-          'Requires yaml module installed (`pip install pyyaml`).')
-    return yaml.dump(self._updated_config(), **kwargs)
+    raise RuntimeError(
+        'Method `model.to_yaml()` has been removed due to security risk of '
+        'arbitrary code execution. Please use `model.to_json()` instead.'
+    )
 
   def reset_states(self):
     for layer in self.layers:
diff --git a/tensorflow/python/keras/mixed_precision/autocast_variable.py b/tensorflow/python/keras/mixed_precision/autocast_variable.py
index 3cacee0cb82cea..9538714fc3d637 100644
--- a/tensorflow/python/keras/mixed_precision/autocast_variable.py
+++ b/tensorflow/python/keras/mixed_precision/autocast_variable.py
@@ -57,12 +57,11 @@ class AutoCastVariable(variables.Variable, core.Tensor):
   called.
   """
 
-  def __init__(self, variable, op=None):
+  def __init__(self, variable):
     """Creates an AutoCastVariable instance.
 
     Args:
       variable: A floating-point resource variable to wrap.
-      op: Optional operation of this variable.
 
     Raises:
       ValueError: If `variable` is not a floating-point resource variable
@@ -74,7 +73,11 @@ def __init__(self, variable, op=None):
       raise ValueError('variable must be a floating point variable but has '
                        'type: %s' % variable.dtype.name)
     self._variable = variable
-    self._op = op
+    # 'delegate' means AutoCastVariable.op return self._variable.op, which will
+    # raise an AttributeError in Eager (as intended). If set to any other value,
+    # AutoCastVariable.op returns that value instead, which is used to set the
+    # op attribute in AutoCastVariable.assign().
+    self._op = 'delegate'
 
   def _should_cast(self):
     """Returns True if this variable should be casted when accessed."""
@@ -199,10 +202,18 @@ def _apply_assign_update(self,
                            use_locking=None,
                            name=None,
                            read_value=True):
+    # TODO(b/146181571): This logic can be simplified once
+    # DistributedVariable.assign returns a DistributedVariable. Currently for
+    # MirroredStrategy, it returns a Mirrored value.
     if ops.executing_eagerly_outside_functions():
       assign_op = update_fn(value, use_locking, name, False)
       if read_value:
-        return create_autocast_variable(self._variable, op=assign_op)
+        # We create a new AutoCastVariable with the same underlying tf.Variable.
+        # The new AutoCastVariable is identical except the 'op' attribute is
+        # defined. This matches the behavior of tf.Variable.assign.
+        var = create_autocast_variable(self._variable)
+        var._op = assign_op  # pylint:disable=protected-access
+        return var
       return assign_op
 
     # Fallback to wrapping the returned variable in graph mode if possible
@@ -298,9 +309,9 @@ def device(self):
 
   @property
   def op(self):
-    if self._op is not None:
-      return self._op
-    return self._variable.op
+    if self._op == 'delegate':
+      return self._variable.op
+    return self._op
 
   def _as_graph_element(self):
     graph_element = self._variable._as_graph_element()  # pylint:disable=protected-access
@@ -469,7 +480,7 @@ def __rmatmul__(self, o):
                                         AutoCastVariable._dense_var_to_tensor)  # pylint:disable=protected-access
 
 
-def create_autocast_variable(variable, op=None):
+def create_autocast_variable(variable):
   """Creates an AutoCastVariable that wraps another variable.
 
   This typically just returns `AutoCastVariable(variable)`. But, if the variable
@@ -481,14 +492,13 @@ def create_autocast_variable(variable, op=None):
 
   Args:
     variable: A floating-point resource variable to wrap.
-    op: Optional operation of this variable.
 
   Returns:
     An AutoCastVariable that wraps the variable.
   """
   if not isinstance(variable, (distribute_values.DistributedVariable,
                                ps_distribute_values.AggregatingVariable)):
-    return AutoCastVariable(variable, op=op)
+    return AutoCastVariable(variable)
 
   class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
     """An AutoCastVariable that also subclasses from variable.__class__.
@@ -511,7 +521,7 @@ def __repr__(self):
              ).format(v=self)
       # pylint: enable=missing-format-attribute
 
-  return AutoCastDistributedVariable(variable, op=op)
+  return AutoCastDistributedVariable(variable)
 
 
 class enable_auto_cast_variables(object):  # pylint:disable=invalid-name
diff --git a/tensorflow/python/keras/mixed_precision/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/autocast_variable_test.py
index c21ff8652054a5..6d70aaaca273e7 100644
--- a/tensorflow/python/keras/mixed_precision/autocast_variable_test.py
+++ b/tensorflow/python/keras/mixed_precision/autocast_variable_test.py
@@ -37,7 +37,14 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_combinations as combinations
 from tensorflow.python.keras.mixed_precision import autocast_variable
+from tensorflow.python.keras.optimizer_v2 import adadelta
+from tensorflow.python.keras.optimizer_v2 import adagrad
+from tensorflow.python.keras.optimizer_v2 import adam
+from tensorflow.python.keras.optimizer_v2 import adamax
+from tensorflow.python.keras.optimizer_v2 import ftrl
 from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2
+from tensorflow.python.keras.optimizer_v2 import nadam
+from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
@@ -352,11 +359,28 @@ def run_assign():
         self.assertAllClose(5., self.evaluate(run_assign()))
 
   @ds_combinations.generate(maybe_distribute)
-  def test_assign_op(self, distribution):
+  def test_op_attribute(self, distribution):
     with distribution.scope():
       x = get_var(0., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
 
+      # Variable.op raises an AttributeError in Eager mode and is an op in graph
+      # mode. Variable.assign(...).op is None in Eager mode and an op in Graph
+      # mode or a tf.function. We test this is also true of AutoCastVariable.
+      if context.executing_eagerly():
+        with self.assertRaisesRegex(
+            AttributeError,
+            'Tensor.op is meaningless when eager execution is enabled'):
+          x.op  # pylint: disable=pointless-statement
+        self.assertIsNone(x.assign(1.0).op)
+        self.assertIsNone(x.assign_add(1.0).op)
+        self.assertIsNone(x.assign_sub(1.0).op)
+      else:
+        self.assertIsNotNone(x.op)
+        self.assertIsNotNone(x.assign(1.0).op)
+        self.assertIsNotNone(x.assign_add(1.0).op)
+        self.assertIsNotNone(x.assign_sub(1.0).op)
+
       @def_function.function
       def func():
         self.assertIsNotNone(x.assign(1.0).op)
@@ -503,25 +527,51 @@ def test_repr_distributed(self):
             'dtype_to_cast_to=float32 '
             'inner_variable=MirroredVariable.*>')
 
-  @parameterized.named_parameters(
-      ('v1', gradient_descent_v1.GradientDescentOptimizer),
-      ('v2', gradient_descent_v2.SGD))
-  def test_optimizer(self, optimizer_class):
+  @ds_combinations.generate(combinations.combine(
+      optimizer_class=[
+          adadelta.Adadelta,
+          adagrad.Adagrad,
+          adam.Adam,
+          adamax.Adamax,
+          ftrl.Ftrl,
+          gradient_descent_v2.SGD,
+          nadam.Nadam,
+          rmsprop.RMSprop,
+          gradient_descent_v1.GradientDescentOptimizer
+      ],
+      use_tf_function=[False, True]))
+  def test_optimizer(self, optimizer_class, use_tf_function):
+    if use_tf_function and not context.executing_eagerly():
+      self.skipTest('Test does not support graph mode with tf.function')
     x = get_var(1., dtypes.float32)
     x = autocast_variable.create_autocast_variable(x)
-    opt = optimizer_class(1.)
+    y = get_var(1., dtypes.float32)
+    opt = optimizer_class(learning_rate=1.)
 
-    @def_function.function
     def f():
-      opt.minimize(lambda: x + 1., var_list=[x])
+      # Minimize both the AutoCastVariable and the normal tf.Variable. Both
+      # variables should be updated to the same value.
+      op = opt.minimize(lambda: x + y, var_list=[x, y])
+      return None if ops.executing_eagerly_outside_functions() else op
+
+    if use_tf_function:
+      f = def_function.function(f)
 
     if context.executing_eagerly():
       f()
     else:
-      op = f()  # pylint: disable=assignment-from-no-return
+      op = f()
       self.evaluate(variables.global_variables_initializer())
       self.evaluate(op)
-    self.assertEqual(self.evaluate(x), 0)
+    # Assert the AutoCastVariable has changed from its initial value
+    self.assertNotEqual(self.evaluate(x), 1.)
+    # Assert AutoCastVariable is updated correctly by comparing it to the normal
+    # variable
+    self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
+    if optimizer_class in (gradient_descent_v2.SGD,
+                           gradient_descent_v1.GradientDescentOptimizer):
+      # With SGD, the variables decreases by exactly 1
+      self.assertEqual(self.evaluate(x), 0)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/keras/saving/model_config.py b/tensorflow/python/keras/saving/model_config.py
index facc95b22f905b..b4a98e98709f36 100644
--- a/tensorflow/python/keras/saving/model_config.py
+++ b/tensorflow/python/keras/saving/model_config.py
@@ -23,18 +23,11 @@
 
 from tensorflow.python.util.tf_export import keras_export
 
-# pylint: disable=g-import-not-at-top
-try:
-  import yaml
-except ImportError:
-  yaml = None
-# pylint: enable=g-import-not-at-top
-
 
 @keras_export('keras.models.model_from_config')
 def model_from_config(config, custom_objects=None):
   """Instantiates a Keras model from its config.
- 
+
   Usage:
   ```
   # for a Functional API model
@@ -68,17 +61,8 @@ def model_from_config(config, custom_objects=None):
 def model_from_yaml(yaml_string, custom_objects=None):
   """Parses a yaml model configuration file and returns a model instance.
 
-  Usage:
-
-  >>> model = tf.keras.Sequential([
-  ...     tf.keras.layers.Dense(5, input_shape=(3,)),
-  ...     tf.keras.layers.Softmax()])
-  >>> try:
-  ...   import yaml
-  ...   config = model.to_yaml()
-  ...   loaded_model = tf.keras.models.model_from_yaml(config)
-  ... except ImportError:
-  ...   pass
+  Note: Since TF 2.6, this method is no longer supported and will raise a
+  RuntimeError.
 
   Arguments:
       yaml_string: YAML string or open file encoding a model configuration.
@@ -90,19 +74,13 @@ def model_from_yaml(yaml_string, custom_objects=None):
       A Keras model instance (uncompiled).
 
   Raises:
-      ImportError: if yaml module is not found.
+      RuntimeError: announces that the method poses a security risk
   """
-  if yaml is None:
-    raise ImportError('Requires yaml module installed (`pip install pyyaml`).')
-  # The method unsafe_load only exists in PyYAML 5.x+, so which branch of the
-  # try block is covered by tests depends on the installed version of PyYAML.
-  try:
-    # PyYAML 5.x+
-    config = yaml.unsafe_load(yaml_string)
-  except AttributeError:
-    config = yaml.load(yaml_string)
-  from tensorflow.python.keras.layers import deserialize  # pylint: disable=g-import-not-at-top
-  return deserialize(config, custom_objects=custom_objects)
+  raise RuntimeError(
+      'Method `model_from_yaml()` has been removed due to security risk of '
+      'arbitrary code execution. Please use `Model.to_json()` and '
+      '`model_from_json()` instead.'
+  )
 
 
 @keras_export('keras.models.model_from_json')
diff --git a/tensorflow/python/keras/saving/saved_model/load.py b/tensorflow/python/keras/saving/saved_model/load.py
index cb6d340ea0356d..efcdeb0620ee85 100644
--- a/tensorflow/python/keras/saving/saved_model/load.py
+++ b/tensorflow/python/keras/saving/saved_model/load.py
@@ -135,7 +135,7 @@ def load(path, compile=True, options=None):  # pylint: disable=redefined-builtin
 
   # Recreate layers and metrics using the info stored in the metadata.
   keras_loader = KerasObjectLoader(metadata, object_graph_def)
-  keras_loader.load_layers()
+  keras_loader.load_layers(compile=compile)
 
   # Generate a dictionary of all loaded nodes.
   nodes_to_load = {'root': None}
@@ -360,7 +360,7 @@ def _add_children_recreated_from_config(self, obj, proto, node_id):
           obj_child, child_proto, child_id)
       self.loaded_nodes[child_id] = obj_child, setter
 
-  def load_layers(self):
+  def load_layers(self, compile=True):  # pylint: disable=redefined-builtin
     """Load all layer nodes from the metadata."""
     # Load metrics after models and layers, since it's likely that models
     # and layers will create the metric when initialized (this avoids wasting
@@ -376,9 +376,21 @@ def load_layers(self):
           node_metadata.metadata)
 
     for node_metadata in metric_list:
-      self.loaded_nodes[node_metadata.node_id] = self._load_layer(
-          node_metadata.node_id, node_metadata.identifier,
-          node_metadata.metadata)
+      try:
+        self.loaded_nodes[node_metadata.node_id] = self._load_layer(
+            node_metadata.node_id, node_metadata.identifier,
+            node_metadata.metadata)
+      except ValueError:
+        # Metrics are only needed when the model is compiled later. We ignore
+        # errors when trying to load custom metrics when `compile=False` until
+        # custom metrics are serialized properly (b/135550038).
+        if compile:
+          raise
+        logging.warning('Unable to restore custom metric. Please ensure that '
+                        'the layer implements `get_config` and `from_config` '
+                        'when saving. In addition, please use the '
+                        '`custom_objects` arg when calling `load_model()`.')
+
 
   def _load_layer(self, node_id, identifier, metadata):
     """Load a single layer from a SavedUserObject proto."""
diff --git a/tensorflow/python/keras/saving/saved_model/saved_model_test.py b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
index 726ef570da8be0..d685a649f5b297 100644
--- a/tensorflow/python/keras/saving/saved_model/saved_model_test.py
+++ b/tensorflow/python/keras/saving/saved_model/saved_model_test.py
@@ -1147,6 +1147,26 @@ def update_state(self, value):
       self._test_metric_save_and_load(
           metric, self._save_model_dir(), 1, test_sample_weight=False)
 
+  @keras_parameterized.run_with_all_model_types
+  def test_custom_metric_model(self):
+
+    class CustomMetric(keras.metrics.MeanSquaredError):
+      pass
+
+    model = testing_utils.get_small_mlp(1, 4, input_dim=3)
+    model.compile(
+        loss='mse',
+        optimizer='rmsprop',
+        metrics=[CustomMetric()])
+
+    saved_model_dir = self._save_model_dir()
+    tf_save.save(model, saved_model_dir)
+    with self.assertRaisesRegex(ValueError, 'metric'):
+      keras_load.load(saved_model_dir)
+
+    keras_load.load(saved_model_dir, compile=False)
+
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 43f19c26639154..985fef3995500f 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -2388,6 +2388,7 @@ cuda_py_test(
     name = "stack_op_test",
     size = "small",
     srcs = ["stack_op_test.py"],
+    tags = ["no_oss"],
     tfrt_enabled = True,
     deps = [
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 4106ea9b166dec..392f01e2464de9 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -1528,7 +1528,7 @@ def testUnravelIndexZeroDim(self):
     with self.cached_session():
       for dtype in [dtypes.int32, dtypes.int64]:
         with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                    "index is out of bound as with dims"):
+                                    "dims cannot contain a dim of zero"):
           indices = constant_op.constant([2, 5, 7], dtype=dtype)
           dims = constant_op.constant([3, 0], dtype=dtype)
           self.evaluate(array_ops.unravel_index(indices=indices, dims=dims))
diff --git a/tensorflow/python/kernel_tests/attention_ops_test.py b/tensorflow/python/kernel_tests/attention_ops_test.py
index 804a0b20cc9dd4..fd83f4f6510c63 100644
--- a/tensorflow/python/kernel_tests/attention_ops_test.py
+++ b/tensorflow/python/kernel_tests/attention_ops_test.py
@@ -22,6 +22,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_image_ops
 from tensorflow.python.ops import image_ops
@@ -301,6 +302,18 @@ def testGlimpseNonNormalizedNonCentered(self):
           np.asarray([[5, 6, 7], [10, 11, 12], [15, 16, 17]]),
           self.evaluate(result2)[0, :, :, 0])
 
+  def testGlimpseNegativeInput(self):
+    img = np.arange(9).reshape([1, 3, 3, 1])
+    with self.test_session():
+      with self.assertRaises((errors.InternalError, ValueError)):
+        result = image_ops.extract_glimpse_v2(
+            img,
+            size=[1023, -63],
+            offsets=[1023, 63],
+            centered=False,
+            normalized=False)
+        self.evaluate(result)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
index 73098ed3084da6..2af570da73f815 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/stats_ops_test.py
@@ -21,9 +21,11 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import boosted_trees_ops
+from tensorflow.python.ops import gen_boosted_trees_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import googletest
 
@@ -1669,6 +1671,199 @@ def testMakeStatsSummaryNumericalPrecisionMegaBatch(self):
     """Tests numeric precision."""
     self._verify_precision(length=50000000)
 
+  def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary_list = [[[[]]]]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    max_splits = 1
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature(
+          node_id_range=node_id_range,
+          stats_summary_list=stats_summary_list,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          max_splits=max_splits)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary = [[[[]]]]
+    split_type = 'equality'
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary=stats_summary,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=[0, 8],
+          stats_summary=[[[[1.0], [2.0], [3.0]]]],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=8)
+
+  def testBoostedTreesCalculateBestFeatureSplitV2Security(self):
+    node_id_range = [1, 2]
+    stats_summaries_list = [[[[[]]]]]
+    split_types = ['inequality']
+    candidate_feature_ids = [1, 2, 3, 4]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2(
+          node_id_range=node_id_range,
+          stats_summaries_list=stats_summaries_list,
+          split_types=split_types,
+          candidate_feature_ids=candidate_feature_ids,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self):
+    node_id_range = []
+    stats_summary_indices = [[]]
+    stats_summary_values = [1.0]
+    stats_summary_shape = [1, 1, 1, 1]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [0.5]
+    min_node_weight = [1.0]
+    logits_dimension = 3
+    split_type = 'inequality'
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary_indices=stats_summary_indices,
+          stats_summary_values=stats_summary_values,
+          stats_summary_shape=stats_summary_shape,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=[0, 1],
+          stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]],
+          stats_summary_values=[0.1, 0.2, 0.3],
+          stats_summary_shape=[1, 1, 1, 1],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=1)
+
+  def testBoostedTreesMakeStatsSummarySecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesMakeStatsSummarySecurity2(self):
+    node_ids = [1, 2, 3]
+    gradients = [[0.1], [0.2]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature=feature,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity2(self):
+    node_ids = [-10]
+    gradients = [[0.0, 0.0]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      self.evaluate(
+          gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+              node_ids=node_ids,
+              gradients=gradients,
+              hessians=hessians,
+              feature=feature,
+              max_splits=max_splits,
+              num_buckets=num_buckets))
+
+  def testBoostedTreesSparseAggregateStatsSecurity(self):
+    node_ids = []
+    gradients = [[1.0]]
+    hessians = [[100.0]]
+    feature_indices = [[0, 0, 0]]
+    feature_values = [0, 0, 0]
+    feature_shape = [0, 0, 0]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature_indices=feature_indices,
+          feature_values=feature_values,
+          feature_shape=feature_shape,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
 
 class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
   """Tests multi-class/multi-regression for best splits using V2 op."""
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index d0c4fea8eb47ab..14a42e780459ef 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -24,6 +24,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gradient_checker
@@ -460,6 +461,16 @@ def testKernelSizeMatchesInputSize(self):
         padding="VALID",
         expected=[1.5625, 1.875])
 
+  def DISABLED_testZeroSizedFilterThrowsIllegalArgument(self):
+    tensor_in_sizes = [1, 1, 1, 1, 1]
+    x1 = self._CreateNumpyTensor(tensor_in_sizes)
+    filter_in = np.ones((1, 1, 0, 1, 1), dtype=np.float32)
+    with self.assertRaisesRegex(
+        errors_impl.InvalidArgumentError, "filter must not have zero elements"
+        "|has a non-positive dimension"):
+      self.evaluate(
+          nn_ops.conv3d(x1, filter_in, strides=[1, 1, 1, 1, 1], padding="SAME"))
+
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
       padding, test_input, data_format, use_gpu):
diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py
index dd033121329f08..c6631f195851b0 100644
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
@@ -2522,139 +2522,83 @@ def testShapeFunctionEdgeCases(self):
           strides=[1, 1, 1, 1],
           padding=[0, 0, 0, 0])
 
-  @test_util.deprecated_graph_mode_only
-  def testOpEdgeCases(self):
-    with self.cached_session() as sess:
-      # Illegal strides.
-      with self.assertRaisesRegex(errors_impl.UnimplementedError,
-                                  "strides in the batch and depth"):
-        input_placeholder = array_ops.placeholder(dtypes.float32)
-        input_val = np.ones([10, 10])
-        filter_placeholder = array_ops.placeholder(dtypes.float32)
-        filter_val = np.ones([10, 10])
-        sess.run(
-            nn_ops.conv2d(
-                input_placeholder,
-                filter_placeholder,
-                strides=[2, 1, 1, 1],
-                padding="SAME"),
-            feed_dict={
-                input_placeholder: input_val,
-                filter_placeholder: filter_val
-            })
-      with self.assertRaisesRegex(errors_impl.UnimplementedError,
-                                  "strides in the batch and depth"):
-        input_placeholder = array_ops.placeholder(dtypes.float32)
-        filter_placeholder = array_ops.placeholder(dtypes.float32)
-        input_val = np.ones([10, 10])
-        filter_val = np.ones([10, 10])
-        sess.run(
-            nn_ops.conv2d(
-                input_placeholder,
-                filter_placeholder,
-                strides=[1, 1, 1, 2],
-                padding="SAME"),
-            feed_dict={
-                input_placeholder: input_val,
-                filter_placeholder: filter_val
-            })
-
-      # Filter larger than input.
-      with self.assertRaisesRegex(ValueError, "Negative dimension size"):
-        input_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[32, 20, 20, 3])
-        input_val = np.ones([32, 20, 20, 3])
-        filter_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[20, 21, 3, 2])
-        filter_val = np.ones([20, 21, 3, 2])
-
-        sess.run(
-            nn_ops.conv2d(
-                input_placeholder,
-                filter_placeholder,
-                strides=[1, 1, 1, 1],
-                padding="VALID"),
-            feed_dict={
-                input_placeholder: input_val,
-                filter_placeholder: filter_val
-            })
-      with self.assertRaisesRegex(ValueError, "Negative dimension size"):
-        input_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[32, 20, 20, 3])
-        input_val = np.ones([32, 20, 20, 3])
-        filter_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[21, 20, 3, 2])
-        filter_val = np.ones([21, 20, 3, 2])
-        sess.run(
-            nn_ops.conv2d(
-                input_placeholder,
-                filter_placeholder,
-                strides=[1, 1, 1, 1],
-                padding="VALID"),
-            feed_dict={
-                input_placeholder: input_val,
-                filter_placeholder: filter_val
-            })
-
-      # Filter larger than input + padding.
-      with self.assertRaisesRegex(ValueError, "Negative dimension size"):
-        input_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[32, 20, 20, 3])
-        input_val = np.ones([32, 20, 20, 3])
-        filter_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[24, 25, 3, 2])
-        filter_val = np.ones([24, 25, 3, 2])
-        sess.run(
-            nn_ops.conv2d(
-                input_placeholder,
-                filter_placeholder,
-                strides=[1, 1, 1, 1],
-                padding=[[0, 0], [2, 2], [2, 2], [0, 0]]),
-            feed_dict={
-                input_placeholder: input_val,
-                filter_placeholder: filter_val
-            })
-
-      # Negative padding during backprop.
-      with self.assertRaisesRegex(
-          errors_impl.InvalidArgumentError,
-          "All elements of explicit_paddings must be nonnegative"):
-        filter_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[18, 18, 3, 2])
-        filter_val = np.ones([18, 18, 3, 2])
-        out_backprop = array_ops.placeholder(
-            dtypes.float32, shape=[32, 3, 2, 2])
-        out_backprop_val = np.ones([32, 3, 2, 2])
-        sess.run(
-            nn_ops.conv2d_backprop_input([32, 20, 20, 3],
-                                         filter_placeholder,
-                                         out_backprop,
-                                         strides=[1, 1, 1, 1],
-                                         padding=[[0, 0], [-1, 0], [0, 0],
-                                                  [0, 0]]),
-            feed_dict={
-                filter_placeholder: filter_val,
-                out_backprop: out_backprop_val
-            })
-      with self.assertRaisesRegex(
-          errors_impl.InvalidArgumentError,
-          "All elements of explicit_paddings must be nonnegative"):
-        input_placeholder = array_ops.placeholder(
-            dtypes.float32, shape=[32, 20, 20, 3])
-        input_val = np.ones([32, 20, 20, 3])
-        out_backprop = array_ops.placeholder(
-            dtypes.float32, shape=[32, 3, 2, 2])
-        out_backprop_val = np.ones([32, 3, 2, 2])
-        sess.run(
-            nn_ops.conv2d_backprop_filter(
-                input_placeholder, [18, 18, 3, 2],
-                out_backprop,
-                strides=[1, 1, 1, 1],
-                padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]),
-            feed_dict={
-                input_placeholder: input_val,
-                out_backprop: out_backprop_val
-            })
+  def DISABLED_testOpEdgeCases(self):
+    # Illegal strides.
+    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
+                                "strides in the batch and depth"):
+      input_val = np.ones([2, 4, 10, 10])
+      filter_val = np.ones([2, 4, 10, 10])
+      self.evaluate(
+          nn_ops.conv2d(
+              input_val, filter_val, strides=[2, 1, 1, 1], padding="SAME"))
+    with self.assertRaisesRegex((ValueError, errors_impl.UnimplementedError),
+                                "strides in the batch and depth"):
+      input_val = np.ones([2, 4, 10, 10])
+      filter_val = np.ones([2, 4, 10, 10])
+      self.evaluate(
+          nn_ops.conv2d(
+              input_val, filter_val, strides=[1, 1, 1, 2], padding="SAME"))
+
+    # TODO(b/195689143): Will enable when fixed for V2 behavior
+    # # Filter larger than input.
+    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
+    #   input_val = np.ones([32, 20, 20, 3])
+    #   filter_val = np.ones([20, 21, 3, 2])
+    #   self.evaluate(
+    #       nn_ops.conv2d(
+    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
+    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
+    #   input_val = np.ones([32, 20, 20, 3])
+    #   filter_val = np.ones([21, 20, 3, 2])
+    #   self.evaluate(
+    #       nn_ops.conv2d(
+    #           input_val, filter_val, strides=[1, 1, 1, 1], padding="VALID"))
+    #
+    # # Filter larger than input + padding.
+    # with self.assertRaisesRegex(ValueError, "Negative dimension size"):
+    #   input_val = np.ones([32, 20, 20, 3])
+    # filter_val = np.ones([24, 25, 3, 2])
+    #   self.evaluate(
+    #       nn_ops.conv2d(
+    #           input_val,
+    #           filter_val,
+    #           strides=[1, 1, 1, 1],
+    #           padding=[[0, 0], [2, 2], [2, 2], [0, 0]]))
+
+    # Filter dimensions must be greater than 0.
+    with self.assertRaisesRegex(
+        errors_impl.InvalidArgumentError, "filter must not have zero elements"
+        "|has a non-positive dimension"):
+      input_val = np.ones([1, 1, 1, 1])
+      filter_val = np.ones([1, 0, 1, 1])
+      self.evaluate(
+          nn_ops.conv2d(
+              input_val, filter_val, strides=[1, 1, 1, 1], padding="SAME"))
+
+    # Negative padding during backprop.
+    with self.assertRaisesRegex(
+        errors_impl.InvalidArgumentError,
+        "All elements of explicit_paddings must be nonnegative"):
+      filter_val = np.ones([18, 18, 3, 2])
+      out_backprop_val = np.ones([32, 3, 2, 2])
+      self.evaluate(
+          nn_ops.conv2d_backprop_input([32, 20, 20, 3],
+                                       filter_val,
+                                       out_backprop_val,
+                                       strides=[1, 1, 1, 1],
+                                       padding=[[0, 0], [-1, 0], [0, 0], [0,
+                                                                          0]]))
+    with self.assertRaisesRegex(
+        errors_impl.InvalidArgumentError,
+        "All elements of explicit_paddings must be nonnegative"):
+      input_val = np.ones([32, 20, 20, 3])
+      out_backprop_val = np.ones([32, 3, 2, 2])
+      self.evaluate(
+          nn_ops.conv2d_backprop_filter(
+              input_val, [18, 18, 3, 2],
+              out_backprop_val,
+              strides=[1, 1, 1, 1],
+              padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]))
 
 
 class DepthwiseConv2DTest(test.TestCase):
@@ -2664,10 +2608,10 @@ def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
     """Verifies the output values of the convolution function.
 
     Args:
-      tensor_in_sizes: Input tensor dimensions in
-        [batch, input_rows, input_cols, input_depth].
-      filter_in_sizes: Filter tensor dimensions in
-        [filter_rows, filter_cols, input_depth, depth_multiplier].
+      tensor_in_sizes: Input tensor dimensions in [batch, input_rows,
+        input_cols, input_depth].
+      filter_in_sizes: Filter tensor dimensions in [filter_rows, filter_cols,
+        input_depth, depth_multiplier].
       stride: Stride.
       padding: Padding type.
       expected: An array containing the expected operation outputs.
diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index 4b0915b37e7784..20511e9f7b96e2 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -63,6 +63,9 @@ cuda_py_test(
     size = "small",
     srcs = ["beta_test.py"],
     tfrt_enabled = True,
+    tags = [
+        "no_oss",
+    ],
     deps = [
         "//tensorflow/python:client",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index f2348c6c7acab1..5dc9f7ea05214f 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -23,6 +23,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
@@ -542,6 +543,20 @@ def testMixedDType(self):
         constant_op.constant(4, dtype=dtypes.int32), dtype=dtypes.int64)
     self.assertAllEqual(self.evaluate(tf_ans), np.array([0, 1, 2, 3]))
 
+  def testLargeStarts(self):
+    # Test case for GitHub issue 46899.
+    with self.session():
+      with self.assertRaises(errors_impl.InternalError):
+        v = math_ops.range(start=-1e+38, limit=1)
+        self.evaluate(v)
+
+  def testLargeLimits(self):
+    # Test case for GitHub issue 46913.
+    with self.session():
+      with self.assertRaises(errors_impl.ResourceExhaustedError):
+        v = math_ops.range(0, 9223372036854775807)
+        self.evaluate(v)
+
 
 # TODO(vrv): move to sequence_ops_test?
 class LinSpaceTest(test.TestCase):
diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py
index f2f6dc33b84a9e..81c4bbbf05d818 100644
--- a/tensorflow/python/kernel_tests/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/list_ops_test.py
@@ -1425,7 +1425,7 @@ def testConcatWithUninitializedTensorsFailsIfNoElementShape(self):
     with self.assertRaisesRegex(
         errors.InvalidArgumentError,
         r"Trying to concat list with only uninitialized tensors "
-        r"but element_shape_except_first_dim_ is not fully defined"):
+        r"but element_shape_except_first_dim is not fully defined"):
       t = list_ops.tensor_list_concat(l, element_dtype=dtypes.float32)
       self.evaluate(t)
 
diff --git a/tensorflow/python/kernel_tests/lookup_ops_test.py b/tensorflow/python/kernel_tests/lookup_ops_test.py
index d564da12c27260..c2ff260d7d5c68 100644
--- a/tensorflow/python/kernel_tests/lookup_ops_test.py
+++ b/tensorflow/python/kernel_tests/lookup_ops_test.py
@@ -3375,6 +3375,71 @@ def testMutableHashTableFindHighRank(self):
     result = self.evaluate(output)
     self.assertAllEqual([[0, 1], [-1, -1]], result)
 
+  def testMutableHashTableFindWithInvalidShapeDefaultValue(self):
+    default_val = [-1, -1]
+    table = lookup_ops.MutableHashTable(dtypes.string, dtypes.int64,
+                                        default_val)
+
+    input_string = constant_op.constant([["brain", "salad"],
+                                         ["tank", "tarkus"]])
+
+    invalid_default_val = constant_op.constant(
+        [[-2, -3], [-4, -5], [-6, -7], [-8, -9]], dtypes.int64)
+
+    with self.assertRaisesRegex(
+        (ValueError, errors_impl.InvalidArgumentError),
+        "Expected shape \[2\] or \[2,2,2\] for default value, got \[4,2]"):
+      self.evaluate(table.lookup(input_string, invalid_default_val))
+
+    invalid_default_val = constant_op.constant([[[-2, -3], [-4, -5]]],
+                                               dtypes.int64)
+    with self.assertRaisesRegex(
+        (ValueError, errors_impl.InvalidArgumentError),
+        "Expected shape \[2\] or \[2,2,2\] for default value, got \[1,2,2\]"):
+      self.evaluate(table.lookup(input_string, invalid_default_val))
+
+  def testMutableHashTableFindHighRankScalarWithDynamicDefaultValue(self):
+    default_val = -1
+    keys = constant_op.constant(["brain", "salad", "surgery"])
+    values = constant_op.constant([0, 1, 2], dtypes.int64)
+    table = lookup_ops.MutableHashTable(dtypes.string, dtypes.int64,
+                                        default_val)
+
+    self.evaluate(table.insert(keys, values))
+    self.assertAllEqual(3, self.evaluate(table.size()))
+
+    input_string = constant_op.constant([["brain", "salad"],
+                                         ["tank", "tarkus"]])
+
+    dynamic_default_val = constant_op.constant([[-2, -3], [-4, -5]],
+                                               dtypes.int64)
+    output = table.lookup(input_string, dynamic_default_val)
+    self.assertAllEqual([2, 2], output.get_shape())
+
+    result = self.evaluate(output)
+    self.assertAllEqual([[0, 1], [-4, -5]], result)
+
+  def testMutableHashTableFindHighRankVectorWithDynamicDefaultValue(self):
+    default_val = [-1, -1]
+    keys = constant_op.constant(["brain", "salad", "surgery"])
+    values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64)
+    table = lookup_ops.MutableHashTable(dtypes.string, dtypes.int64,
+                                        default_val)
+
+    self.evaluate(table.insert(keys, values))
+    self.assertAllEqual(3, self.evaluate(table.size()))
+
+    input_string = constant_op.constant([["brain", "salad"],
+                                         ["tank", "tarkus"]])
+
+    dynamic_default_val = constant_op.constant(
+        [[[-2, -3], [-4, -5]], [[-6, -7], [-8, -9]]], dtypes.int64)
+    output = table.lookup(input_string, dynamic_default_val)
+    self.assertAllEqual([2, 2, 2], output.get_shape())
+
+    result = self.evaluate(output)
+    self.assertAllEqual([[[0, 1], [2, 3]], [[-6, -7], [-8, -9]]], result)
+
   def testMutableHashTableInsertHighRank(self):
     default_val = -1
     keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]])
diff --git a/tensorflow/python/kernel_tests/matrix_solve_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
index 209e60417da62e..2bfbc2f14b3877 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_op_test.py
@@ -112,6 +112,12 @@ def testWrongDimensions(self):
     with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
       self.evaluate(linalg_ops.matrix_solve(matrix, rhs))
 
+    # The matrix and right-hand side should have the same batch dimensions
+    matrix = np.random.normal(size=(2, 6, 2, 2))
+    rhs = np.random.normal(size=(2, 3, 2, 2))
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      self.evaluate(linalg_ops.matrix_solve(matrix, rhs))
+
   def testNotInvertible(self):
     # The input should be invertible.
     with self.assertRaisesOpError("Input matrix is not invertible."):
diff --git a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
index 051f7e1168a72b..83d26e27d9849f 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py
@@ -21,6 +21,7 @@
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
@@ -505,6 +506,19 @@ def testAvgPoolGradSamePadding3_1_3d(self):
         strides=(1, 1, 1),
         padding="SAME")
 
+  def testMaxPool3DZeroPoolSize(self):
+    # Test case for GitHub issue 51936.
+    for f in [nn_ops.max_pool3d, nn_ops.avg_pool3d]:
+      with self.session():
+        with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+          input_sizes = [3, 4, 10, 11, 12]
+
+          input_data = 1.
+          input_tensor = constant_op.constant(
+              input_data, shape=input_sizes, name="input")
+          pool_3d = f(input_tensor, ksize=[2, 2, 0], strides=1, padding="VALID")
+          self.evaluate(pool_3d)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 6a9350bd3dafb2..5da824e0d406b7 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -267,6 +267,20 @@ def testDataInvalid(self):
               data=np.uint16(10), segment_ids=np.array([]).astype("int64"))
           self.evaluate(s)
 
+  def testInvalidIds(self):
+    # Test case for GitHub issue 46888.
+    for op in [
+        math_ops.segment_max,
+        math_ops.segment_min,
+        math_ops.segment_mean,
+        math_ops.segment_sum,
+        math_ops.segment_prod,
+    ]:
+      with self.cached_session(use_gpu=False):
+        with self.assertRaises((ValueError, errors_impl.InternalError)):
+          s = op(data=np.ones((1, 10, 1)), segment_ids=[1676240524292489355])
+          self.evaluate(s)
+
 
 class UnsortedSegmentTest(SegmentReductionHelper):
 
diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py
index 5a165c9454230c..38d69be486fe67 100644
--- a/tensorflow/python/kernel_tests/shape_ops_test.py
+++ b/tensorflow/python/kernel_tests/shape_ops_test.py
@@ -723,6 +723,17 @@ def testShapeFunctionEdgeCases(self):
           inp, array_ops.placeholder(
               dtypes.int32, shape=[3]))
 
+  def testLargeTensor(self):
+    # Test case for GItHub issue 46911.
+    if test_util.is_xla_enabled():
+      # The following test fails with XLA enabled.
+      return
+    with self.assertRaises(errors_impl.InternalError):
+      with self.cached_session():
+        tiled = array_ops.tile(
+            np.ones((1, 1, 1)), [100000000, 100000000, 100000000])
+        self.evaluate(tiled)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
index 5a48eb825dbfa8..434287679181bb 100644
--- a/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_serialization_ops_test.py
@@ -20,10 +20,12 @@
 
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
@@ -464,6 +466,18 @@ def testDeserializeManyFailsInvalidProto(self):
     self._testDeserializeFailsInvalidProtoHelper(
         sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
 
+  def testDeserializeInvalidVariant(self):
+    mu = gen_resource_variable_ops.mutex_v2()
+    mu_lock = gen_resource_variable_ops.mutex_lock(mutex=mu)
+
+    @def_function.function
+    def f():
+      return sparse_ops.deserialize_sparse(
+          serialized_sparse=mu_lock, dtype=dtypes.int32)
+
+    with self.assertRaisesRegex(ValueError, r"Shape must be at least rank 1"):
+      f()
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/split_op_test.py b/tensorflow/python/kernel_tests/split_op_test.py
index 16f92dbd875b15..1658eab4bd63a5 100644
--- a/tensorflow/python/kernel_tests/split_op_test.py
+++ b/tensorflow/python/kernel_tests/split_op_test.py
@@ -387,6 +387,24 @@ def testNonexistentDimTensor(self):
                                   "must have exactly one element"):
         sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]})
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNegativeSizes(self):
+    x = constant_op.constant([1, 2, 3], dtypes.float32)
+    # A size of -1 signifies to determine size based on sum of other splits.
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "Split size at index 1 must be >= 0. Got: -2"):
+      splits = [-1, -2]
+      self.evaluate(array_ops.split(x, splits, axis=0))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testBadSplitSizes(self):
+    x = constant_op.constant([1, 2], dtypes.float32)
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "Determined shape must either match input"
+                                "|can't split axis"):
+      splits = [1, 2]
+      self.evaluate(array_ops.split(x, splits, axis=0))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/stack_op_test.py b/tensorflow/python/kernel_tests/stack_op_test.py
index 8237ce228af02a..c8e41a19201950 100644
--- a/tensorflow/python/kernel_tests/stack_op_test.py
+++ b/tensorflow/python/kernel_tests/stack_op_test.py
@@ -20,12 +20,16 @@
 
 import numpy as np
 
+from tensorflow.python import tf2
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -76,6 +80,19 @@ def testSimpleParallelCPU(self):
           self.assertAllEqual(c, data)
 
   @test_util.run_deprecated_v1
+  def testParallelConcatShapeZero(self):
+    if not tf2.enabled():
+      self.skipTest("only fails in TF2")
+
+    @def_function.function
+    def f():
+      y = gen_array_ops.parallel_concat(values=[["tf"]], shape=0)
+      return y
+
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r"0th dimension of value .* is less than"):
+      f()
+
   def testSimpleParallelGPU(self):
     np.random.seed(7)
     with self.session(use_gpu=True):
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index 9302152e82bfa9..eae4e10f378567 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -492,6 +492,15 @@ def testInvalidUnit(self):
       with self.assertRaises(ValueError):
         string_ops.substr(b"test", 3, 1, unit="UTF8")
 
+  def testInvalidPos(self):
+    # Test case for GitHub issue 46900.
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      x = string_ops.substr(b"abc", len=1, pos=[1, -1])
+      self.evaluate(x)
+
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      x = string_ops.substr(b"abc", len=1, pos=[1, 2])
+      self.evaluate(x)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/summary_ops_test.py b/tensorflow/python/kernel_tests/summary_ops_test.py
index 0cf35b9e342fc1..3009541a6b6094 100644
--- a/tensorflow/python/kernel_tests/summary_ops_test.py
+++ b/tensorflow/python/kernel_tests/summary_ops_test.py
@@ -1206,6 +1206,26 @@ def f():
       # Reset to default state for other tests.
       summary_ops.set_step(None)
 
+  @test_util.run_v2_only
+  def testTrace_withProfiler(self):
+
+    @def_function.function
+    def f():
+      x = constant_op.constant(2)
+      y = constant_op.constant(3)
+      return x**y
+
+    assert context.executing_eagerly()
+    logdir = self.get_temp_dir()
+    writer = summary_ops.create_file_writer(logdir)
+    summary_ops.trace_on(graph=True, profiler=True)
+    profiler_outdir = self.get_temp_dir()
+    with writer.as_default():
+      f()
+      summary_ops.trace_export(
+          name='foo', step=1, profiler_outdir=profiler_outdir)
+    writer.close()
+
 
 def events_from_file(filepath):
   """Returns all events in a single event file.
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 4d0f6507aef58c..892c5855e581cc 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -147,6 +147,47 @@ def testEmptyTensorArrayPack(self):
       c0 = self.evaluate(c0)
       self.assertAllEqual([3, 0, 1], c0.shape)
 
+  def testTensorArrayWriteConcatInParallel(self):
+    with self.session(use_gpu=True):
+
+      def _concat_1():
+        ta = tensor_array_ops.TensorArray(
+            dtype=dtypes.int32, size=2, infer_shape=False)
+        w0 = ta.write(0, constant_op.constant([1]))
+        w1 = w0.write(1, constant_op.constant([],
+                                              shape=(0,),
+                                              dtype=dtypes.int32))
+        return w1.concat()
+
+      def _concat_2():
+        ta = tensor_array_ops.TensorArray(
+            dtype=dtypes.int32, size=3, infer_shape=False)
+        w0 = ta.write(0, constant_op.constant([8]))
+        w1 = w0.write(1, constant_op.constant([],
+                                              shape=(0,),
+                                              dtype=dtypes.int32))
+        w2 = w1.write(2, constant_op.constant([9]))
+        return w2.concat()
+
+      def _write(index, output):
+        elements = control_flow_ops.cond(
+            math_ops.less(index, 3), _concat_1, _concat_2)
+        return (index + 1, output.write(index, elements))
+
+      num_iterations = 6
+      init_state = (0,
+                    tensor_array_ops.TensorArray(
+                        dtype=dtypes.int32,
+                        size=num_iterations,
+                        infer_shape=False))
+      _, final_state = control_flow_ops.while_loop(
+          lambda i, _: i < num_iterations, _write, init_state)
+
+      c0 = final_state.concat()
+
+      c0 = self.evaluate(c0)
+      self.assertAllEqual([1, 1, 1, 8, 9, 8, 9, 8, 9], c0)
+
   def _testTensorArrayWriteConcat(self, tf_dtype):
     with self.cached_session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index 87096211a01494..ed634ae7543b54 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -387,6 +387,8 @@ def testDouble(self):
 
   @test_util.run_v1_only("b/120545219")
   def testComplex64(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex64))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex64))
     self._testBoth(
         np.complex(1, 2) *
         np.arange(0, 21).reshape([3, 7]).astype(np.complex64))
@@ -399,6 +401,8 @@ def testComplex64(self):
 
   @test_util.run_v1_only("b/120545219")
   def testComplex128(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex128))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex128))
     self._testBoth(
         np.complex(1, 2) *
         np.arange(0, 21).reshape([3, 7]).astype(np.complex128))
diff --git a/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py b/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
index 456f13e86a7079..175c68d7b3ad0d 100644
--- a/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/tridiagonal_matmul_op_test.py
@@ -23,12 +23,15 @@
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradient_checker_v2
+from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
@@ -179,6 +182,37 @@ def testGradientComplexWithBatches(self):
     rhs = self._randomComplexArray((b, m, n))
     self._gradientTest(diags, rhs, dtype=dtypes.complex128)
 
+  def _testErrorWithShapesEager(self, exception_regex, superdiag_shape,
+                                maindiag_shape, subdiag_shape, rhs_shape):
+    with context.eager_mode():
+      superdiag = array_ops.ones(superdiag_shape)
+      maindiag = array_ops.ones(maindiag_shape)
+      subdiag = array_ops.ones(subdiag_shape)
+      rhs = array_ops.ones(rhs_shape)
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  exception_regex):
+        linalg_ops.tridiagonal_mat_mul(superdiag, maindiag, subdiag, rhs)
+
+  def testInvalidShapesEagerGpu(self):
+    if not test.is_gpu_available():
+      self.skipTest('Test requires GPU')
+    self._testErrorWithShapesEager('Input must have rank >= 2, but got ',
+                                   [2], [2], [2], [2])
+    self._testErrorWithShapesEager(
+        'superdiag must have same rank as rhs, but got 3 and 2',
+        [2, 1, 2], [2, 1], [2, 1], [2, 2])
+    self._testErrorWithShapesEager(
+        'maindiag must have same outer dimensions as rhs, but for index 0, got '
+        '3 and 2',
+        [2, 1, 2], [3, 1, 2], [2, 1, 2], [2, 2, 2])
+    self._testErrorWithShapesEager(
+        "subdiag's second-to-last dimension must be 1, but got 3",
+        [2, 1, 2], [2, 1, 2], [2, 3, 2], [2, 2, 2])
+    self._testErrorWithShapesEager(
+        "subdiag's last dimension size must be rhs's second-to-last dimension "
+        "size, but got 3 and 2",
+        [2, 1, 2], [2, 1, 2], [2, 1, 3], [2, 2, 2])
+
   # Benchmark
   class TridiagonalMatMulBenchmark(test.Benchmark):
     sizes = [(100000, 1, 1), (1000000, 1, 1), (10000000, 1, 1), (100000, 10, 1),
diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py
index 6e60a935e9350d..610510e5604ae5 100644
--- a/tensorflow/python/kernel_tests/xent_op_test.py
+++ b/tensorflow/python/kernel_tests/xent_op_test.py
@@ -195,6 +195,13 @@ def testShapeMismatch(self):
         gen_nn_ops.softmax_cross_entropy_with_logits(
             [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]])
 
+    tf_f = constant_op.constant(np.array([[1.]]).astype(np.float32))
+    tf_l = constant_op.constant(np.array([[1.], [1.]]).astype(np.float32))
+    tf_loss, tf_gradient = gen_nn_ops.softmax_cross_entropy_with_logits(
+        tf_f, tf_l)
+    self.assertAllClose([0, 0], tf_loss)
+    self.assertAllCloseAccordingToType([[0], [0]], tf_gradient)
+
   @test_util.run_deprecated_v1
   def testNotMatrix(self):
     with self.cached_session():
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index 03fbea397485e8..6cf51ceebbdaaa 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/python/lib/core/ndarray_tensor.h"
 
 #include <cstring>
+#include <optional>
 
 #include "tensorflow/c/eager/tfe_context_internal.h"
 #include "tensorflow/c/tf_tensor_internal.h"
@@ -74,6 +75,13 @@ Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr,
   PyObject* key;
   PyObject* value;
   Py_ssize_t pos = 0;
+
+  // Return an error if the fields attribute is null.
+  // Occurs with an improper conversion attempt to resource.
+  if (descr->fields == nullptr) {
+    return errors::Internal("Unexpected numpy data type");
+  }
+
   if (PyDict_Next(descr->fields, &pos, &key, &value)) {
     // In Python 3, the keys of numpy custom struct types are unicode, unlike
     // Python 2, where the keys are bytes.
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 67a7c7a0e94b98..3b7cb815f139cd 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -4668,6 +4668,7 @@ def reverse_sequence(input,
 
 
 @tf_export("reverse_sequence", v1=[])
+@dispatch.add_dispatch_support
 def reverse_sequence_v2(input,
                         seq_lengths,
                         seq_axis=None,
diff --git a/tensorflow/python/ops/bincount_ops_test.py b/tensorflow/python/ops/bincount_ops_test.py
index 5bc9bc1ab777cf..1ef5e10e6608f3 100644
--- a/tensorflow/python/ops/bincount_ops_test.py
+++ b/tensorflow/python/ops/bincount_ops_test.py
@@ -836,6 +836,25 @@ def test_ragged_input_different_shape_fails(self):
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
 
 
+class RawOpsHeapOobTest(test.TestCase, parameterized.TestCase):
+
+  @test_util.run_v1_only("Test security error")
+  def testSparseCountSparseOutputBadIndicesShapeTooSmall(self):
+    indices = [1]
+    values = [[1]]
+    weights = []
+    dense_shape = [10]
+    with self.assertRaisesRegex(ValueError,
+                                "Shape must be rank 2 but is rank 1 for"):
+      self.evaluate(
+          gen_count_ops.SparseCountSparseOutput(
+              indices=indices,
+              values=values,
+              dense_shape=dense_shape,
+              weights=weights,
+              binary_output=True))
+
+
 @test_util.run_all_in_graph_and_eager_modes
 @test_util.disable_tfrt
 class RawOpsTest(test.TestCase, parameterized.TestCase):
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 320facf5afa2d1..d5d8a2d85c5789 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -2242,6 +2242,21 @@ def testNameScope(self):
       y = image_ops.pad_to_bounding_box(image, 0, 0, 55, 66)
       self.assertTrue(y.op.name.startswith("pad_to_bounding_box"))
 
+  def testInvalidInput(self):
+    # Test case for GitHub issue 46890.
+    if test_util.is_xla_enabled():
+      # TODO(b/200850176): test fails with XLA.
+      return
+    with self.session():
+      with self.assertRaises(errors.InternalError):
+        v = image_ops.pad_to_bounding_box(
+            image=np.ones((1, 1, 1)),
+            target_height=5191549470,
+            target_width=5191549470,
+            offset_height=1,
+            offset_width=1)
+        self.evaluate(v)
+
 
 class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase):
 
@@ -3149,6 +3164,14 @@ def testPreserveAspectRatioSquare(self):
 
     self._assertResizeCheckShape(x, x_shape, [320, 320], [320, 320, 3])
 
+  def testLargeDim(self):
+    with self.session():
+      with self.assertRaises(errors.InternalError):
+        x = np.ones((5, 1, 1, 2))
+        v = image_ops.resize_images_v2(x, [1610637938, 1610637938],
+                                       image_ops.ResizeMethod.BILINEAR)
+        _ = self.evaluate(v)
+
 
 class ResizeImagesTest(test_util.TensorFlowTestCase,
                        parameterized.TestCase):
@@ -5769,6 +5792,16 @@ def testImageCropAndResize(self):
             crop_size=[1, 1])
         self.evaluate(op)
 
+  def DISABLED_testImageCropAndResizeWithInvalidInput(self):
+    with self.session():
+      with self.assertRaises((errors.InternalError, ValueError)):
+        op = image_ops_impl.crop_and_resize_v2(
+            image=np.ones((1, 1, 1, 1)),
+            boxes=np.ones((11, 4)),
+            box_indices=np.ones((11)),
+            crop_size=[2065374891, 1145309325])
+        self.evaluate(op)
+
   @parameterized.named_parameters(
       ("_jpeg", "JPEG", "jpeg_merge_test1.jpg"),
       ("_png", "PNG", "lena_rgba.png"),
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index f99102fee52ac4..145c2b0195cdfb 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -1849,7 +1849,7 @@ def remove(self, keys, name=None):
 
     return op
 
-  def lookup(self, keys, name=None):
+  def lookup(self, keys, dynamic_default_values=None, name=None):
     """Looks up `keys` in a table, outputs the corresponding values.
 
     The `default_value` is used for keys not present in the table.
@@ -1857,6 +1857,23 @@ def lookup(self, keys, name=None):
     Args:
       keys: Keys to look up. Can be a tensor of any shape. Must match the
         table's key_dtype.
+      dynamic_default_values: The values to use if a key is missing in the
+        table. If None (by default), the `table.default_value` will be used.
+        Shape of `dynamic_default_values` must be same with
+        `table.default_value` or the lookup result tensor.
+        In the latter case, each key will have a different default value.
+
+        For example:
+
+          ```python
+          keys = [0, 1, 3]
+          dynamic_default_values = [[1, 3, 4], [2, 3, 9], [8, 3, 0]]
+
+          # The key '0' will use [1, 3, 4] as default value.
+          # The key '1' will use [2, 3, 9] as default value.
+          # The key '3' will use [8, 3, 0] as default value.
+          ```
+
       name: A name for the operation (optional).
 
     Returns:
@@ -1870,8 +1887,9 @@ def lookup(self, keys, name=None):
                         (self.resource_handle, keys, self._default_value)):
       keys = ops.convert_to_tensor(keys, dtype=self._key_dtype, name="keys")
       with ops.colocate_with(self.resource_handle):
-        values = gen_lookup_ops.lookup_table_find_v2(self.resource_handle, keys,
-                                                     self._default_value)
+        values = gen_lookup_ops.lookup_table_find_v2(
+            self.resource_handle, keys, dynamic_default_values
+            if dynamic_default_values is not None else self._default_value)
     return values
 
   def insert(self, keys, values, name=None):
diff --git a/tensorflow/python/ops/nn_fused_batchnorm_test.py b/tensorflow/python/ops/nn_fused_batchnorm_test.py
index 0421829bff338b..1f4663d495a757 100644
--- a/tensorflow/python/ops/nn_fused_batchnorm_test.py
+++ b/tensorflow/python/ops/nn_fused_batchnorm_test.py
@@ -20,10 +20,13 @@
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
@@ -669,6 +672,126 @@ def testBatchNormGradGradConfig6(self):
     }
     self._testBatchNormGradGrad(config)
 
+  def testEagerShapeErrors(self):
+    with context.eager_mode():
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((3,))
+      offset = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'scale must have the same number of elements'):
+        nn_impl.fused_batch_norm(x, scale, offset)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((3,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'offset must have the same number of elements'):
+        nn_impl.fused_batch_norm(x, scale, offset)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((0,))
+      variance = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When is_training=false, mean must have the same number of elements'):
+        nn_impl.fused_batch_norm(
+            x, scale, offset, mean=mean, variance=variance, is_training=False)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((2,))
+      variance = array_ops.ones((0,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When is_training=false, variance must have the same number of '
+          'elements'):
+        nn_impl.fused_batch_norm(
+            x, scale, offset, mean=mean, variance=variance, is_training=False)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((0,))
+      variance = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When exponential_avg_factor != 1, mean must have the same number of '
+          'elements'):
+        nn_impl.fused_batch_norm(
+            x,
+            scale,
+            offset,
+            mean=mean,
+            variance=variance,
+            exponential_avg_factor=0.5)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((2,))
+      variance = array_ops.ones((0,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When exponential_avg_factor != 1, variance must have the same '
+          'number of elements'):
+        nn_impl.fused_batch_norm(
+            x,
+            scale,
+            offset,
+            mean=mean,
+            variance=variance,
+            exponential_avg_factor=0.5)
+
+  def testEagerShapeGradErrors(self):
+    with context.eager_mode():
+      y_backprop = array_ops.ones((2, 2, 2, 3))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  'x and y_backprop must have same shape,'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((3,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'scale must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((3,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'reserve_space_1 must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((3,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'reserve_space_2 must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/ops/ragged/ragged_cross_op_test.py b/tensorflow/python/ops/ragged/ragged_cross_op_test.py
index 07e5964ba83497..bfe7aa31a65325 100644
--- a/tensorflow/python/ops/ragged/ragged_cross_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_cross_op_test.py
@@ -22,11 +22,14 @@
 
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_ragged_array_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_array_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
@@ -362,6 +365,16 @@ def testRaggedCrossLargeBatch(self):
                   dense_const([[2], [3]])],
           exception=(ValueError, errors.InvalidArgumentError),
           message='inputs must all have the same batch dimension size'),
+      dict(
+          testcase_name='3DDenseTensor',
+          inputs=[dense_const([[[1]]])],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='0DDenseTensor',
+          inputs=[dense_const(1)],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
   ])
   def testStaticError(self, inputs, exception=ValueError, message=None):
     with self.assertRaisesRegex(exception, message):
@@ -372,17 +385,36 @@ def testStaticError(self, inputs, exception=ValueError, message=None):
           testcase_name='3DRaggedTensor',
           inputs=[ragged_const([[[1]]], ragged_rank=1)],
           message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='0DDenseTensor',
+          inputs=[dense_const(1)],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='1DDenseTensor',
+          inputs=[dense_const([1])],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
       dict(
           testcase_name='3DDenseTensor',
           inputs=[dense_const([[[1]]])],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
           message='tf.ragged.cross only supports inputs with rank=2'),
   ])
   def testRuntimeError(self,
                        inputs,
                        exception=errors.InvalidArgumentError,
-                       message=None):
+                       message=None,
+                       signature=None):
+    @def_function.function(input_signature=signature)
+    def fn(x):
+      return ragged_array_ops.cross(x)
+
     with self.assertRaisesRegex(exception, message):
-      self.evaluate(ragged_array_ops.cross(inputs))
+      self.evaluate(fn(inputs))
 
   def _ragged_to_sparse(self, t):
     if ragged_tensor.is_ragged(t):
@@ -392,6 +424,42 @@ def _ragged_to_sparse(self, t):
     else:
       return ops.convert_to_tensor(t)
 
+  def testSparseValuesAndIndicesMustMatch(self):
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        'sparse indices and values must have the same length'):
+      self.evaluate(gen_ragged_array_ops.RaggedCross(
+          ragged_values=[],
+          ragged_row_splits=[],
+          sparse_indices=[[5]],
+          sparse_values=[],
+          sparse_shape=[5],
+          dense_inputs=[['a']],
+          input_order='RD',
+          hashed_output=False,
+          num_buckets=5,
+          hash_key=2,
+          out_values_type=dtypes.string,
+          out_row_splits_type=dtypes.int64))
+
+  def testRaggedValuesAndSplitsMustMatch(self):
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        'ragged values and splits must have the same length'):
+      self.evaluate(gen_ragged_array_ops.RaggedCross(
+          ragged_values=[['a']],
+          ragged_row_splits=[],
+          sparse_indices=[],
+          sparse_values=[],
+          sparse_shape=[],
+          dense_inputs=[['a']],
+          input_order='RD',
+          hashed_output=False,
+          num_buckets=5,
+          hash_key=2,
+          out_values_type=dtypes.string,
+          out_row_splits_type=dtypes.int64))
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
index bead4923a0a4cf..ace724ac8711d2 100644
--- a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
@@ -21,9 +21,11 @@
 import numpy as np
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import map_fn as map_fn_lib
 from tensorflow.python.ops import math_ops as mo
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
@@ -309,6 +311,27 @@ def testMapOnSparseTensor(self):
     )
     self.assertAllEqual(id_t2, [[0, 5], [0, 4]])
 
+  def testRaggedMapWithIncorrectFnOutputSignature(self):
+    x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]])
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'All flat_values must have compatible shapes'):
+      y = map_fn_lib.map_fn(lambda r: map_fn_lib.map_fn(lambda y: r, r), x)
+      self.evaluate(y)
+
+  def testNestedRaggedMapWithFnOutputSignature(self):
+    ragged1d = ragged_tensor.RaggedTensorSpec([None], dtypes.int32)
+    ragged2d = ragged_tensor.RaggedTensorSpec([None, None], dtypes.int32)
+
+    x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]])
+    # pylint: disable=g-long-lambda
+    y = map_fn_lib.map_fn(
+        lambda r: map_fn_lib.map_fn(
+            lambda y: r, r, fn_output_signature=ragged1d),
+        x,
+        fn_output_signature=ragged2d)
+    expected = [[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], [[1]]]
+    self.assertAllEqual(y, expected)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index db9227c97cb594..ca0be073d9487b 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -1370,4 +1370,7 @@ def trace_off():
     context.context().disable_run_metadata()
 
   if profiler:
-    _profiler.stop()
+    try:
+      _profiler.stop()
+    except _profiler.ProfilerNotRunningError:
+      pass
diff --git a/tensorflow/python/summary/writer/writer_test.py b/tensorflow/python/summary/writer/writer_test.py
index 19138b1372dea6..9fcac4952f6a25 100644
--- a/tensorflow/python/summary/writer/writer_test.py
+++ b/tensorflow/python/summary/writer/writer_test.py
@@ -34,6 +34,7 @@
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -685,6 +686,16 @@ def testSharing_withExplicitSummaryFileWriters(self):
     # No more files
     self.assertRaises(StopIteration, lambda: next(event_paths))
 
+  def testSummaryFileWritersInvalidInput(self):
+    # Test case for GitHub issue 46909
+    logdir = self.get_temp_dir()
+    with session.Session() as sess:
+      with self.assertRaises(errors_impl.InvalidArgumentError):
+        writer = summary_ops_v2.create_file_writer(
+            logdir=logdir, flush_millis=[1, 2])
+        sess.run(writer.init())
+        sess.run(writer.flush())
+
 
 class FileWriterCacheTest(test.TestCase):
   """FileWriterCache tests."""
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index 124686dff13fbe..c828718889ad4f 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -24,6 +24,7 @@
 from __future__ import print_function
 
 import argparse
+import ast
 import os
 import re
 import sys
@@ -518,7 +519,7 @@ def preprocess_inputs_arg_string(inputs_str):
   return input_dict
 
 
-def preprocess_input_exprs_arg_string(input_exprs_str):
+def preprocess_input_exprs_arg_string(input_exprs_str, safe=True):
   """Parses input arg into dictionary that maps input key to python expression.
 
   Parses input string in the format of 'input_key=<python expression>' into a
@@ -526,8 +527,10 @@ def preprocess_input_exprs_arg_string(input_exprs_str):
 
   Args:
     input_exprs_str: A string that specifies python expression for input keys.
-    Each input is separated by semicolon. For each input key:
+      Each input is separated by semicolon. For each input key:
         'input_key=<python expression>'
+    safe: Whether to evaluate the python expression as literals or allow
+      arbitrary calls (e.g. numpy usage).
 
   Returns:
     A dictionary that maps input keys to their values.
@@ -542,8 +545,15 @@ def preprocess_input_exprs_arg_string(input_exprs_str):
       raise RuntimeError('--input_exprs "%s" format is incorrect. Please follow'
                          '"<input_key>=<python expression>"' % input_exprs_str)
     input_key, expr = input_raw.split('=', 1)
-    # ast.literal_eval does not work with numpy expressions
-    input_dict[input_key] = eval(expr)  # pylint: disable=eval-used
+    if safe:
+      try:
+        input_dict[input_key] = ast.literal_eval(expr)
+      except:
+        raise RuntimeError(
+            f'Expression "{expr}" is not a valid python literal.')
+    else:
+      # ast.literal_eval does not work with numpy expressions
+      input_dict[input_key] = eval(expr)  # pylint: disable=eval-used
   return input_dict
 
 
@@ -656,7 +666,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str,
   tensor_key_feed_dict = {}
 
   inputs = preprocess_inputs_arg_string(inputs_str)
-  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str)
+  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str, safe=False)
   input_examples = preprocess_input_examples_arg_string(input_examples_str)
 
   for input_tensor_key, (filename, variable_name) in inputs.items():
@@ -920,8 +930,10 @@ def add_run_subparser(subparsers):
   parser_run.add_argument('--inputs', type=str, default='', help=msg)
   msg = ('Specifying inputs by python expressions, in the format of'
          ' "<input_key>=\'<python expression>\'", separated by \';\'. '
-         'numpy module is available as \'np\'. '
-         'Will override duplicate input keys from --inputs option.')
+         'numpy module is available as \'np\'. Please note that the expression '
+         'will be evaluated as-is, and is susceptible to code injection. '
+         'When this is set, the value will override duplicate input keys from '
+         '--inputs option.')
   parser_run.add_argument('--input_exprs', type=str, default='', help=msg)
   msg = (
       'Specifying tf.Example inputs as list of dictionaries. For example: '
diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py
index 2580cbd73ca0a9..df43ba0bbb65a0 100644
--- a/tensorflow/python/tools/saved_model_cli_test.py
+++ b/tensorflow/python/tools/saved_model_cli_test.py
@@ -380,7 +380,7 @@ def testInputPreProcessFormats(self):
     input_expr_str = 'input3=np.zeros([2,2]);input4=[4,5]'
     input_dict = saved_model_cli.preprocess_inputs_arg_string(input_str)
     input_expr_dict = saved_model_cli.preprocess_input_exprs_arg_string(
-        input_expr_str)
+        input_expr_str, safe=False)
     self.assertTrue(input_dict['input1'] == ('/path/file.txt', 'ab3'))
     self.assertTrue(input_dict['input2'] == ('file2', None))
     print(input_expr_dict['input3'])
@@ -416,6 +416,11 @@ def testInputPreProcessExamplesWithStrAndBytes(self):
           }
     """, feature)
 
+  def testInputPreprocessExampleWithCodeInjection(self):
+    input_examples_str = 'inputs=os.system("echo hacked")'
+    with self.assertRaisesRegex(RuntimeError, 'not a valid python literal.'):
+      saved_model_cli.preprocess_input_examples_arg_string(input_examples_str)
+
   def testInputPreProcessFileNames(self):
     input_str = (r'inputx=C:\Program Files\data.npz[v:0];'
                  r'input:0=c:\PROGRA~1\data.npy')
@@ -432,8 +437,8 @@ def testInputPreProcessErrorBadFormat(self):
     with self.assertRaises(RuntimeError):
       saved_model_cli.preprocess_inputs_arg_string(input_str)
     input_str = 'inputx:np.zeros((5))'
-    with self.assertRaises(RuntimeError):
-      saved_model_cli.preprocess_input_exprs_arg_string(input_str)
+    with self.assertRaisesRegex(RuntimeError, 'format is incorrect'):
+      saved_model_cli.preprocess_input_exprs_arg_string(input_str, safe=False)
 
   def testInputParserNPY(self):
     x0 = np.array([[1], [2]])
diff --git a/tensorflow/python/tpu/tpu_embedding.py b/tensorflow/python/tpu/tpu_embedding.py
index 7c42bb2c41fe8d..67c1393696c366 100644
--- a/tensorflow/python/tpu/tpu_embedding.py
+++ b/tensorflow/python/tpu/tpu_embedding.py
@@ -52,8 +52,13 @@
 #  as AdagradParameters etc instead of learning_rate.
 class TableConfig(
     collections.namedtuple('TableConfig', [
-        'vocabulary_size', 'dimension', 'initializer', 'combiner',
-        'hot_id_replication', 'learning_rate', 'learning_rate_fn',
+        'vocabulary_size',
+        'dimension',
+        'initializer',
+        'combiner',
+        'hot_id_replication',
+        'learning_rate',
+        'learning_rate_fn',
         'optimization_parameters',
     ])):
   """Embedding table configuration."""
@@ -85,16 +90,16 @@ def __new__(cls,
       hot_id_replication: If true, enables hot id replication, which can make
         embedding lookups faster if there are some hot rows in the table.
       learning_rate: float, static learning rate for this table. If
-        learning_rate and learning_rate_fn are both `None`, static learning
-        rate as specified in local `optimization_parameters` will be used.
-        In case local `optimization_parameters` is `None`, global
+        learning_rate and learning_rate_fn are both `None`, static learning rate
+        as specified in local `optimization_parameters` will be used. In case
+        local `optimization_parameters` is `None`, global
         `optimization_parameters` in `TPUEmbedding` constructor will be used.
         `learning_rate_fn` must be `None` if `learning_rate` is not `None.
       learning_rate_fn: string, use dynamic learning rate given by the function.
         This function function will be passed the current global step. If
-        learning_rate and learning_rate_fn are both `None`, static
-        learning rate as specified in `optimization_parameters` is used.
-        `learning_rate` must be `None` if `learning_rate_fn` is not `None.
+        learning_rate and learning_rate_fn are both `None`, static learning rate
+        as specified in `optimization_parameters` is used. `learning_rate` must
+        be `None` if `learning_rate_fn` is not `None.
       optimization_parameters: `AdagradParameters`, `AdamParameters`,
         `Stochasticgradientdescentparameters`. Specifies table level optimizer.
         If it's `None` global optimizer in `TPUEmbedding` constructor is used.
@@ -127,8 +132,8 @@ def __new__(cls,
 
     if learning_rate is not None and learning_rate_fn is not None:
       raise ValueError('At most one of learning_rate and learning_rate_fn '
-                       'can be None; got {} and {}'
-                       .format(learning_rate, learning_rate_fn))
+                       'can be None; got {} and {}'.format(
+                           learning_rate, learning_rate_fn))
 
     if optimization_parameters is not None:
       if not isinstance(optimization_parameters, _OptimizationParameters):
@@ -144,15 +149,11 @@ def __new__(cls,
 
 
 class FeatureConfig(
-    collections.namedtuple(
-        'FeatureConfig',
-        ['table_id', 'max_sequence_length', 'weight_key'])):
+    collections.namedtuple('FeatureConfig',
+                           ['table_id', 'max_sequence_length', 'weight_key'])):
   """Feature configuration."""
 
-  def __new__(cls,
-              table_id,
-              max_sequence_length=0,
-              weight_key=None):
+  def __new__(cls, table_id, max_sequence_length=0, weight_key=None):
     """Feature configuration.
 
     Args:
@@ -171,8 +172,8 @@ def __new__(cls,
       ValueError: if `max_sequence_length` non-negative.
     """
     if not isinstance(max_sequence_length, int) or max_sequence_length < 0:
-      raise ValueError('Invalid max_sequence_length {}.'.format(
-          max_sequence_length))
+      raise ValueError(
+          'Invalid max_sequence_length {}.'.format(max_sequence_length))
 
     return super(FeatureConfig, cls).__new__(cls, table_id, max_sequence_length,
                                              weight_key)
@@ -191,19 +192,19 @@ def __new__(cls,
     """Data to be enqueued through generate_enqueue_ops().
 
     Args:
-      embedding_indices: A rank 1 Tensors, indices into the embedding tables. It
+      embedding_indices: A rank 1 Tensor, indices into the embedding tables. It
         corresponds to sp_ids.values in embedding_lookup_sparse(). Both int32
         and int64 are allowed and will be converted to int32 internally.
-      sample_indices: A rank 2 Tensors specifying the training example to which
+      sample_indices: A rank 2 Tensor specifying the training example to which
         the corresponding embedding_indices and aggregation_weights values
         belong. It corresponds to sp_ids.indices in embedding_lookup_sparse().
         If it is None, we assume each embedding_indices belongs to a different
         sample. Both int32 and int64 are allowed and will be converted to int32
         internally.
-      aggregation_weights: A rank 1 Tensors containing aggregation weights.
-        It corresponds to sp_weights.values in embedding_lookup_sparse(). If it
-        is None, we assume all weights are 1. Both float32 and float64 are
-        allowed and will be converted to float32 internally.
+      aggregation_weights: A rank 1 Tensor containing aggregation weights. It
+        corresponds to sp_weights.values in embedding_lookup_sparse(). If it is
+        None, we assume all weights are 1. Both float32 and float64 are allowed
+        and will be converted to float32 internally.
 
     Returns:
       An EnqueueData tuple.
@@ -310,11 +311,11 @@ def get_enqueue_datas_list_from_ragged_tensors_list(rg_tensors_list):
   return enqueue_datas_list
 
 
-AdamSlotVariableNames = collections.namedtuple(
-    'AdamSlotVariableNames', ['m', 'v'])
+AdamSlotVariableNames = collections.namedtuple('AdamSlotVariableNames',
+                                               ['m', 'v'])
 
-AdagradSlotVariableName = collections.namedtuple(
-    'AdagradSlotVariableName', ['accumulator'])
+AdagradSlotVariableName = collections.namedtuple('AdagradSlotVariableName',
+                                                 ['accumulator'])
 
 MomentumSlotVariableName = collections.namedtuple('MomentumSlotVariableName',
                                                   ['momenta'])
@@ -325,8 +326,8 @@ def get_enqueue_datas_list_from_ragged_tensors_list(rg_tensors_list):
 ProximalAdagradSlotVariableName = collections.namedtuple(
     'ProximalAdagradSlotVariableName', ['accumulator'])
 
-FtrlSlotVariableName = collections.namedtuple(
-    'FtrlSlotVariableName', ['accumulator', 'linear'])
+FtrlSlotVariableName = collections.namedtuple('FtrlSlotVariableName',
+                                              ['accumulator', 'linear'])
 
 ProximalYogiSlotVariableNames = collections.namedtuple(
     'ProximalYogiSlotVariableNames', ['v', 'm'])
@@ -340,22 +341,21 @@ def get_enqueue_datas_list_from_ragged_tensors_list(rg_tensors_list):
 RMSPropSlotVariables = collections.namedtuple('RMSPropSlotVariables',
                                               ['ms', 'mom'])
 
-AdagradSlotVariable = collections.namedtuple(
-    'AdagradSlotVariable', ['accumulator'])
+AdagradSlotVariable = collections.namedtuple('AdagradSlotVariable',
+                                             ['accumulator'])
 
 ProximalAdagradSlotVariable = collections.namedtuple(
     'ProximalAdagradSlotVariable', ['accumulator'])
 
-FtrlSlotVariable = collections.namedtuple(
-    'FtrlSlotVariable', ['accumulator', 'linear'])
+FtrlSlotVariable = collections.namedtuple('FtrlSlotVariable',
+                                          ['accumulator', 'linear'])
 
 ProximalYogiSlotVariables = collections.namedtuple('ProximalYogiSlotVariables',
                                                    ['v', 'm'])
 
-VariablesAndOps = collections.namedtuple(
-    'VariablesAndOps',
-    ['embedding_variables_by_table', 'slot_variables_by_table',
-     'load_ops', 'retrieve_ops']
+VariablesAndOps = collections.namedtuple('VariablesAndOps',[
+    'embedding_variables_by_table', 'slot_variables_by_table', 'load_ops',
+    'retrieve_ops']
 )
 
 
@@ -424,7 +424,6 @@ def __init__(
       use_gradient_accumulation: setting this to `False` makes embedding
         gradients calculation less accurate but faster. Please see
         `optimization_parameters.proto` for details.
-        for details.
       clip_weight_min: the minimum value to clip by; None means -infinity.
       clip_weight_max: the maximum value to clip by; None means +infinity.
       weight_decay_factor: amount of weight decay to apply; None means that the
@@ -560,19 +559,18 @@ def __init__(
 
     Args:
       learning_rate: a floating point value. The learning rate.
-      beta1: A float value.
-        The exponential decay rate for the 1st moment estimates.
-      beta2: A float value.
-        The exponential decay rate for the 2nd moment estimates.
+      beta1: A float value. The exponential decay rate for the 1st moment
+        estimates.
+      beta2: A float value. The exponential decay rate for the 2nd moment
+        estimates.
       epsilon: A small constant for numerical stability.
-      lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster.
-        Please see `optimization_parameters.proto` for details.
+      lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster. See
+        `optimization_parameters.proto` for details.
       sum_inside_sqrt: This improves training speed. Please see
         `optimization_parameters.proto` for details.
       use_gradient_accumulation: setting this to `False` makes embedding
         gradients calculation less accurate but faster. Please see
         `optimization_parameters.proto` for details.
-        for details.
       clip_weight_min: the minimum value to clip by; None means -infinity.
       clip_weight_max: the maximum value to clip by; None means +infinity.
       weight_decay_factor: amount of weight decay to apply; None means that the
@@ -656,19 +654,18 @@ def __init__(
     Args:
       learning_rate: a floating point value. The learning rate.
       learning_rate_power: A float value, must be less or equal to zero.
-        Controls how the learning rate decreases during training. Use zero for
-        a fixed learning rate. See section 3.1 in the
+        Controls how the learning rate decreases during training. Use zero for a
+        fixed learning rate. See section 3.1 in the
         [paper](https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf).
-      initial_accumulator_value: The starting value for accumulators.
-        Only zero or positive values are allowed.
-      l1_regularization_strength: A float value, must be greater than or
-        equal to zero.
-      l2_regularization_strength: A float value, must be greater than or
-        equal to zero.
+      initial_accumulator_value: The starting value for accumulators. Only zero
+        or positive values are allowed.
+      l1_regularization_strength: A float value, must be greater than or equal
+        to zero.
+      l2_regularization_strength: A float value, must be greater than or equal
+        to zero.
       use_gradient_accumulation: setting this to `False` makes embedding
         gradients calculation less accurate but faster. Please see
         `optimization_parameters.proto` for details.
-        for details.
       clip_weight_min: the minimum value to clip by; None means -infinity.
       clip_weight_max: the maximum value to clip by; None means +infinity.
       weight_decay_factor: amount of weight decay to apply; None means that the
@@ -728,13 +725,15 @@ class ProximalYogiParameters(_OptimizationParameters):
   """Optimization parameters for Proximal Yogi with TPU embeddings.
 
   Implements the Yogi optimizer as described in
-  [Adaptive Methods for Nonconvex Optimization](https://papers.nips.cc/paper/8186-adaptive-methods-for-nonconvex-optimization).
+  [Adaptive Methods for Nonconvex
+  Optimization](https://papers.nips.cc/paper/8186-adaptive-methods-for-nonconvex-optimization).
 
   Pass this to `tf.estimator.tpu.experimental.EmbeddingConfigSpec` via the
   `optimization_parameters` argument to set the optimizer and its parameters.
   See the documentation for `tf.estimator.tpu.experimental.EmbeddingConfigSpec`
   for more details.
   """
+
   # pylint: enable=line-too-long
 
   def __init__(
@@ -1140,6 +1139,7 @@ def __init__(self,
                cluster_def=None,
                pipeline_execution_with_tensor_core=False,
                partition_strategy='div',
+               profile_data_directory=None,
                device_config=None,
                master_job_name=None):
     """API for using TPU for embedding lookups.
@@ -1166,6 +1166,21 @@ def __init__(self,
       partition_strategy: A string, either 'mod' or 'div', specifying how to map
         the lookup id to the embedding tensor. For more information see
         `tf.nn.embedding_lookup_sparse`.
+      profile_data_directory: Directory where embedding lookup statistics are
+        stored. These statistics summarize information about the inputs to the
+        embedding lookup operation, in particular, the average number of
+        embedding IDs per example and how well the embedding IDs are load
+        balanced across the system. The lookup statistics are used during TPU
+        initialization for embedding table partitioning. Collection of lookup
+        statistics is done at runtime by  profiling the embedding inputs: only
+        3% of input samples are profiled to minimize host CPU overhead. Once
+        a suitable number of samples are profiled, the lookup statistics are
+        saved to table-specific files in the profile data directory generally
+        at the end of a TPU training loop. The filename corresponding to each
+        table is obtained by hashing table specific parameters (e.g., table
+        name and number of features) and global configuration parameters (e.g.,
+        sharding strategy and task count). The same profile data directory can
+        be shared among several models to reuse embedding lookup statistics.
       device_config: A DeviceConfig instance, used when `master` and
         `cluster_def` are both `None`.
       master_job_name: if set, overrides the master job name used to schedule
@@ -1179,6 +1194,8 @@ def __init__(self,
           'Invalid partition_strategy {}'.format(partition_strategy))
     self._partition_strategy = partition_strategy
 
+    self._profile_data_directory = profile_data_directory
+
     _validate_table_to_config_dict(table_to_config_dict)
     # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`.
     self._table_to_config_dict = _create_ordered_dict(table_to_config_dict)
@@ -1220,14 +1237,14 @@ def __init__(self,
       self._num_hosts = tpu_system_metadata.num_hosts
       if master_job_name is None:
         try:
-          master_job_name = tpu_system_metadata_lib.master_job(master,
-                                                               cluster_def)
+          master_job_name = tpu_system_metadata_lib.master_job(
+              master, cluster_def)
         except ValueError as e:
           raise ValueError(str(e) + ' Please specify a master_job_name.')
       self._hosts = []
       for device in tpu_system_metadata.devices:
-        if 'device:CPU:' in device.name and (
-            master_job_name is None or master_job_name in device.name):
+        if 'device:CPU:' in device.name and (master_job_name is None or
+                                             master_job_name in device.name):
           self._hosts.append(device.name)
       self._num_cores_per_host = tpu_system_metadata.num_of_cores_per_host
       self._num_cores = tpu_system_metadata.num_cores
@@ -1244,11 +1261,10 @@ def __init__(self,
       if optimization_parameters is not None:
         raise ValueError('`optimization_parameters` should be `None` '
                          'for inference mode.')
-      self._optimization_parameters = (
-          StochasticGradientDescentParameters(1.))
+      self._optimization_parameters = (StochasticGradientDescentParameters(1.))
     else:
-      raise ValueError('`mode` only supports {} and {}; got {}.'
-                       .format(TRAINING, INFERENCE, mode))
+      raise ValueError('`mode` only supports {} and {}; got {}.'.format(
+          TRAINING, INFERENCE, mode))
     self._mode = mode
 
     # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler`
@@ -1259,11 +1275,13 @@ def __init__(self,
 
     self._pipeline_execution_with_tensor_core = (
         pipeline_execution_with_tensor_core)
-    self._learning_rate_fn = list(set(
-        c.learning_rate_fn for c in self._table_to_config_dict.values()
-        if c.learning_rate_fn is not None))
+    self._learning_rate_fn = list(
+        set(c.learning_rate_fn
+            for c in self._table_to_config_dict.values()
+            if c.learning_rate_fn is not None))
     self._learning_rate_fn_to_tag = {
-        fn: id for id, fn in enumerate(self._learning_rate_fn)}
+        fn: id for id, fn in enumerate(self._learning_rate_fn)
+    }
 
     self._config_proto = self._create_config_proto()
 
@@ -1403,10 +1421,13 @@ def _create_config_proto(self):
         elc.TPUEmbeddingConfiguration.MOD)
     config_proto.pipeline_execution_with_tensor_core = (
         self._pipeline_execution_with_tensor_core)
+    if self._profile_data_directory:
+      config_proto.profile_data_directory = self._profile_data_directory
 
     return config_proto
 
-  def create_variables_and_ops(self, embedding_variable_name_by_table=None,
+  def create_variables_and_ops(self,
+                               embedding_variable_name_by_table=None,
                                slot_variable_names_by_table=None):
     """Create embedding and slot variables, with ops to load and retrieve them.
 
@@ -1425,8 +1446,8 @@ def create_variables_and_ops(self, embedding_variable_name_by_table=None,
 
     Args:
       embedding_variable_name_by_table: A dictionary mapping from string of
-        table name to string of embedding variable name. If `None`,
-        defaults from `get_default_slot_variable_names()` will be used.
+        table name to string of embedding variable name. If `None`, defaults
+        from `get_default_slot_variable_names()` will be used.
       slot_variable_names_by_table: A dictionary mapping from string of table
         name to `AdamSlotVariableNames`, `AdagradSlotVariableNames` etc. If
         `None`, defaults from `get_default_slot_variable_names()` will be used.
@@ -1510,8 +1531,7 @@ def retrieve_ops():
       return retrieve_ops_list
 
     return VariablesAndOps(embedding_variables_by_table,
-                           slot_variables_by_table,
-                           load_ops, retrieve_ops)
+                           slot_variables_by_table, load_ops, retrieve_ops)
 
   def generate_enqueue_ops(
       self,
@@ -1522,10 +1542,9 @@ def generate_enqueue_ops(
     """Generate enqueue ops.
 
     Args:
-      enqueue_datas_list: a list of dictionary mapping from string
-        of feature names to EnqueueData. Each dictionary is for one
-        TPU core. Dictionaries for the same host should be contiguous
-        on the list.
+      enqueue_datas_list: a list of dictionary mapping from string of feature
+        names to EnqueueData. Each dictionary is for one TPU core. Dictionaries
+        for the same host should be contiguous in the list.
       mode_override: A string input that overrides the mode specified in the
         TPUEmbeddingConfiguration. Supported values are {'unspecified',
         'inference', 'training', 'backward_pass_only'}. When set to
@@ -1723,8 +1742,8 @@ def _format_for_tpu_embedding_sparse_tensor_batch(self, enqueue_datas):
             if enqueue_data.sample_indices is not None else int_zeros)
 
         kwargs['aggregation_weights'].append(
-            enqueue_data.aggregation_weights if
-            enqueue_data.aggregation_weights is not None else float_zeros)
+            enqueue_data.aggregation_weights
+            if enqueue_data.aggregation_weights is not None else float_zeros)
 
         kwargs['embedding_indices'].append(enqueue_data.embedding_indices)
 
@@ -1763,14 +1782,13 @@ def get_activations(self):
           feature_index = feature_index + 1
         else:
           activations[feature] = (
-              table_activations[:, feature_index:(feature_index+seq_length), :])
+              table_activations[:,
+                                feature_index:(feature_index + seq_length), :])
           feature_index = feature_index + seq_length
 
     return activations
 
-  def generate_send_gradients_op(self,
-                                 feature_to_gradient_dict,
-                                 step=None):
+  def generate_send_gradients_op(self, feature_to_gradient_dict, step=None):
     """Send gradient to TPU embedding.
 
     Args:
@@ -1786,8 +1804,8 @@ def generate_send_gradients_op(self,
     """
     if self._mode != TRAINING:
       raise RuntimeError('Only in training mode gradients need to '
-                         'be sent to TPU embedding; got mode {}.'
-                         .format(self._mode))
+                         'be sent to TPU embedding; got mode {}.'.format(
+                             self._mode))
     if step is None and self._learning_rate_fn:
       raise ValueError('There are dynamic learning rates but step is None.')
 
@@ -1808,8 +1826,10 @@ def generate_send_gradients_op(self,
 
     return tpu_ops.send_tpu_embedding_gradients(
         inputs=gradients,
-        learning_rates=[math_ops.cast(fn(step), dtype=dtypes.float32)
-                        for fn in self._learning_rate_fn],
+        learning_rates=[
+            math_ops.cast(fn(step), dtype=dtypes.float32)
+            for fn in self._learning_rate_fn
+        ],
         config=self.config_proto.SerializeToString())
 
   def _get_optimizer_handler_by_table(self):
@@ -1835,21 +1855,21 @@ def _validate_table_to_config_dict(table_to_config_dict):
 def _validate_feature_to_config_dict(table_to_config_dict,
                                      feature_to_config_dict):
   """Validate `feature_to_config_dict`."""
-  used_table_set = set([feature.table_id
-                        for feature in feature_to_config_dict.values()])
+  used_table_set = set(
+      [feature.table_id for feature in feature_to_config_dict.values()])
   table_set = set(table_to_config_dict.keys())
 
   unused_table_set = table_set - used_table_set
   if unused_table_set:
-    raise ValueError('`table_to_config_dict` specifies table that is not '
-                     'used in `feature_to_config_dict`: {}.'
-                     .format(unused_table_set))
+    raise ValueError(
+        '`table_to_config_dict` specifies table that is not '
+        'used in `feature_to_config_dict`: {}.'.format(unused_table_set))
 
   extra_table_set = used_table_set - table_set
   if extra_table_set:
-    raise ValueError('`feature_to_config_dict` refers to a table that is not '
-                     'specified in `table_to_config_dict`: {}.'
-                     .format(extra_table_set))
+    raise ValueError(
+        '`feature_to_config_dict` refers to a table that is not '
+        'specified in `table_to_config_dict`: {}.'.format(extra_table_set))
 
 
 def _validate_batch_size(batch_size, num_cores):
@@ -1867,10 +1887,9 @@ def _validate_optimization_parameters(optimization_parameters,
 
   Args:
       optimization_parameters: global optimizer provided in `TPUEmbedding`
-         constructor.
+        constructor.
       table_to_config_dict: A dictionary mapping from string of table name to
         `TableConfig`.
-
   """
   tbl_optimizer_missing = False
   for _, table_config in table_to_config_dict.items():
@@ -2107,8 +2126,7 @@ def load_ops_fn():
       load_op_list = []
       config = config_proto
       for host_id, table_variable, m_variable, v_variable in (zip(
-          range(num_hosts), table_variables,
-          m_variables, v_variables)):
+          range(num_hosts), table_variables, m_variables, v_variables)):
         with ops.colocate_with(table_variable):
           load_parameters_op = (
               tpu_ops.load_tpu_embedding_adam_parameters(
@@ -2134,8 +2152,7 @@ def retrieve_ops_fn():
       retrieve_op_list = []
       config = config_proto
       for host_id, table_variable, m_variable, v_variable in (zip(
-          range(num_hosts), table_variables,
-          m_variables, v_variables)):
+          range(num_hosts), table_variables, m_variables, v_variables)):
         with ops.colocate_with(table_variable):
           retrieved_table, retrieved_m, retrieved_v = (
               tpu_ops.retrieve_tpu_embedding_adam_parameters(
@@ -2174,8 +2191,9 @@ def set_optimization_parameters(self, table_descriptor):
   def get_default_slot_variable_names(self, table):
     # These match the default slot variable names created by
     # tf.train.FtrlOptimizer.
-    return FtrlSlotVariableName('{}/{}'.format(table, 'Ftrl'),  # accumulator
-                                '{}/{}'.format(table, 'Ftrl_1'))  # linear
+    return FtrlSlotVariableName(
+        '{}/{}'.format(table, 'Ftrl'),  # accumulator
+        '{}/{}'.format(table, 'Ftrl_1'))  # linear
 
   def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
                                table_config, table_variables, config_proto):
@@ -2197,8 +2215,7 @@ def create_variables_and_ops(self, table, slot_variable_names, num_hosts,
         embedding_dimension=table_config.dimension,
         collections=[ops.GraphKeys.GLOBAL_VARIABLES],
         initializer=linear_initializer)
-    slot_variables = FtrlSlotVariable(accumulator_variables,
-                                      linear_variables)
+    slot_variables = FtrlSlotVariable(accumulator_variables, linear_variables)
 
     def load_ops_fn():
       """Returns the retrieve ops for Ftrl embedding tables.
@@ -2539,8 +2556,7 @@ def load_ops_fn():
       """
       load_op_list = []
       config = config_proto
-      for host_id, table_variable in (zip(
-          range(num_hosts), table_variables)):
+      for host_id, table_variable in enumerate (table_variables):
         with ops.colocate_with(table_variable):
           load_parameters_op = (
               tpu_ops.load_tpu_embedding_stochastic_gradient_descent_parameters(
@@ -2561,8 +2577,7 @@ def retrieve_ops_fn():
       """
       retrieve_op_list = []
       config = config_proto
-      for host_id, table_variable in (zip(
-          range(num_hosts), table_variables)):
+      for host_id, table_variable in enumerate (table_variables):
         with ops.colocate_with(table_variable):
           retrieved_table = (
               tpu_ops
diff --git a/tensorflow/python/tpu/tpu_test.py b/tensorflow/python/tpu/tpu_test.py
index 8d10d9404d8bde..0858741a4ba491 100644
--- a/tensorflow/python/tpu/tpu_test.py
+++ b/tensorflow/python/tpu/tpu_test.py
@@ -36,6 +36,7 @@
 from tensorflow.python.tpu import tpu
 from tensorflow.python.tpu import tpu_feed
 from tensorflow.python.tpu import training_loop
+from tensorflow.python.tpu.ops import tpu_ops
 
 
 class TPUContextTest(test.TestCase):
@@ -169,6 +170,51 @@ def test_prune_unconnected_ops(self):
         graph.get_operation_by_name("import/y").get_attr(
             tpu._TPU_REPLICATE_ATTR)
 
+
+class TPUOpsTest(test.TestCase):
+
+  def test_all_to_all_zero_split_count(self):
+    with self.assertRaisesRegex(
+        ValueError, "split_count 0 must at least be one"):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[1, -1],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=0)
+
+  def test_all_to_all_group_assignment_wrong_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, "group_assignment must have rank 2"):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[1, -1],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=2)
+
+  def test_all_to_all_split_count_not_equal_to_group_assignment_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, "split_count 1 must equal the size of the second dimension "
+        "of group_assignment 2"):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[[0, 1], [2, 3]],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=1)
+
+  def test_all_to_all_split_count_not_divide_input_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, "input dimension 3 not divisible by split_count 2"):
+      tpu_ops.all_to_all(
+          x=[[0.0], [0.1652], [0.6543]],
+          group_assignment=[[0, 1], [2, 3]],
+          concat_dimension=1,
+          split_dimension=0,
+          split_count=2)
+
+
 def do_einsum():
   a = array_ops.placeholder(dtype=dtypes.float32, name="a", shape=[2, 3, 4])
   b = array_ops.placeholder(dtype=dtypes.float32, name="b", shape=[2, 4, 5])
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 096cdd17dcb0f5..1e2cefe88016e1 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -55,7 +55,7 @@ load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo")
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.4.0"
+VERSION = "2.4.4"
 VERSION_MAJOR = VERSION.split(".")[0]
 
 # Sanitize a dependency so that it works correctly from code that includes
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.experimental.-embedding-config-spec.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.experimental.-embedding-config-spec.pbtxt
index 355c57269fd005..ebcf27eea53557 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.experimental.-embedding-config-spec.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.tpu.experimental.-embedding-config-spec.pbtxt
@@ -31,6 +31,10 @@ tf_class {
     name: "pipeline_execution_with_tensor_core"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "profile_data_directory"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "table_to_config_dict"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda11.0 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda11.0
new file mode 100644
index 00000000000000..cf11068475bff1
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_cuda11.0
@@ -0,0 +1,80 @@
+# Dockerfile for Ubuntu 16.04 manylinux2010 custom ops with GPU.
+
+FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04 as devtoolset
+
+LABEL maintainer="Amit Patankar <amitpatankar@google.com>"
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+      cpio \
+      file \
+      flex \
+      g++ \
+      make \
+      rpm2cpio \
+      unar \
+      wget \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+ADD devtoolset/fixlinks.sh fixlinks.sh
+ADD devtoolset/build_devtoolset.sh build_devtoolset.sh
+ADD devtoolset/rpm-patch.sh rpm-patch.sh
+
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-7 in /dt7.
+RUN /build_devtoolset.sh devtoolset-7 /dt7
+# Set up a sysroot for glibc 2.12 / libstdc++ 4.4 / devtoolset-8 in /dt8.
+RUN /build_devtoolset.sh devtoolset-8 /dt8
+
+# TODO(klimek): Split up into two different docker images.
+FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu16.04
+
+LABEL maintainer="Amit Patankar <amitpatankar@google.com>"
+
+COPY --from=devtoolset /dt7 /dt7
+COPY --from=devtoolset /dt8 /dt8
+
+# Install TensorRT.
+RUN apt-get update && apt-get install -y \
+    libnvinfer-dev=7.1.3-1+cuda11.0 \
+    libnvinfer7=7.1.3-1+cuda11.0 \
+    libnvinfer-plugin-dev=7.1.3-1+cuda11.0 \
+    libnvinfer-plugin7=7.1.3-1+cuda11.0 \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy and run the install scripts.
+COPY install/*.sh /install/
+ARG DEBIAN_FRONTEND=noninteractive
+RUN /install/install_bootstrap_deb_packages.sh
+RUN /install/install_deb_packages.sh
+RUN /install/install_clang.sh
+RUN /install/install_bazel.sh
+RUN /install/install_buildifier.sh
+
+ENV TF_NEED_CUDA=1
+
+# Install python 3.6.
+RUN add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get update && apt-get install -y \
+    python3.6 python3.6-dev python3-pip python3.6-venv && \
+    rm -rf /var/lib/apt/lists/* && \
+    python3.6 -m pip install pip --upgrade && \
+    update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 0
+
+# Install python 3.7
+RUN /install/install_python37.sh
+
+# Install pip3.5
+RUN wget https://bootstrap.pypa.io/get-pip.py && python3.5 get-pip.py && rm get-pip.py
+
+RUN /install/install_pip_packages.sh
+RUN /install/install_auditwheel.sh
+
+# Make python3.6 the default python version
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.6 0
+
+# Install given tensorflow or tf-nightly version, if not specified, install the # latest official release
+ARG TF_PACKAGE=tensorflow
+ARG TF_PACKAGE_VERSION=
+RUN pip3 install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}}
diff --git a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
index e5f02062942028..2ff5601af7208f 100755
--- a/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
+++ b/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
@@ -22,16 +22,16 @@ pip --version
 pip install portpicker
 pip install *.whl
 
-# Make bazel version the same as the env that invokes this script
-rm -rf ~/bazel
-mkdir ~/bazel
-pushd ~/bazel
-wget https://github.com/bazelbuild/bazel/releases/download/"${BAZEL_VERSION}"/bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh
-chmod +x bazel-*.sh
-./bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh --user
-rm bazel-"${BAZEL_VERSION}"-installer-linux-x86_64.sh
-PATH="/bazel_pip/bin:$PATH"
-popd
+# Install bazelisk
+rm -rf ~/bin/bazel
+mkdir ~/bin/bazel
+wget --no-verbose -O "~/bin/bazel" \
+    "https://github.com/bazelbuild/bazelisk/releases/download/v1.3.0/bazelisk-linux-amd64"
+chmod u+x "~/bin/bazel"
+if [[ ! ":$PATH:" =~ :"~"/bin/?: ]]; then
+  PATH="~/bin:$PATH"
+fi
+which bazel
 bazel version
 
 # Use default configuration
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh
index dd7e2a567119b2..3f36d2c32204af 100644
--- a/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh
@@ -27,7 +27,7 @@ python3.6 -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
 # Install macos pip dependencies
-install_macos_pip_deps sudo pip3.6
+install_macos_pip_deps virtualenv
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh
index 7bb1e37a59448b..10b36221c5c98b 100644
--- a/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh
@@ -27,7 +27,7 @@ python -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
 # Install macos pip dependencies
-install_macos_pip_deps sudo pip3.7
+install_macos_pip_deps virtualenv
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh
index 11b557adc96d44..8857f0abcbe400 100644
--- a/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py38_nonpip.sh
@@ -27,7 +27,7 @@ python -m virtualenv tf_build_env --system-site-packages
 source tf_build_env/bin/activate
 
 # Install macos pip dependencies
-install_macos_pip_deps sudo pip3.8
+install_macos_pip_deps virtualenv
 
 # Run configure.
 export TF_NEED_CUDA=0
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index e566e470227a4b..b3049503bb3711 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -140,8 +140,8 @@ function install_ubuntu_16_pip_deps {
   "${PIP_CMD}" install --user 'gast == 0.3.3'
   # Finally, install tensorboard and estimator
   # Note that here we want the latest version that matches (b/156523241)
-  "${PIP_CMD}" install --user --upgrade --force-reinstall 'tb-nightly ~= 2.4.0.a'
-  "${PIP_CMD}" install --user --upgrade --force-reinstall 'tensorflow_estimator ~= 2.3.0'
+  "${PIP_CMD}" install --user --upgrade 'tb-nightly ~= 2.4.0.a'
+  "${PIP_CMD}" install --user --upgrade 'tensorflow_estimator ~= 2.3.0'
   # Test dependencies
   "${PIP_CMD}" install --user 'grpcio ~= 1.32.0'
   "${PIP_CMD}" install --user 'portpicker ~= 1.3.1'
@@ -196,8 +196,8 @@ function install_macos_pip_deps {
   ${PIP_CMD} install --user 'gast == 0.3.3'
   # Finally, install tensorboard and estimator
   # Note that here we want the latest version that matches (b/156523241)
-  ${PIP_CMD} install --user --upgrade --force-reinstall 'tb-nightly ~= 2.4.0.a'
-  ${PIP_CMD} install --user --upgrade --force-reinstall 'tensorflow_estimator ~= 2.3.0'
+  ${PIP_CMD} install --user --upgrade 'tb-nightly ~= 2.4.0.a'
+  ${PIP_CMD} install --user --upgrade 'tensorflow_estimator ~= 2.3.0'
   # Test dependencies
   ${PIP_CMD} install --user 'grpcio ~= 1.32.0'
   ${PIP_CMD} install --user 'portpicker ~= 1.3.1'
diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat
index f27ec3117ed509..66f762e7cb1aa0 100644
--- a/tensorflow/tools/ci_build/release/common_win.bat
+++ b/tensorflow/tools/ci_build/release/common_win.bat
@@ -46,8 +46,8 @@ SET PATH=%PATH%;C:\%PYTHON_DIRECTORY%
 %PY_EXE% -m pip install "gast == 0.3.3"
 @REM Finally, install tensorboard and estimator
 @REM Note that here we want the latest version that matches (b/156523241)
-%PY_EXE% -m pip install --upgrade --force-reinstall "tb-nightly ~= 2.4.0.a"
-%PY_EXE% -m pip install --upgrade --force-reinstall "tensorflow_estimator ~= 2.3.0"
+%PY_EXE% -m pip install --upgrade "tb-nightly ~= 2.4.0.a"
+%PY_EXE% -m pip install --upgrade "tensorflow_estimator ~= 2.3.0"
 @REM Test dependencies
 %PY_EXE% -m pip install "grpcio ~= 1.32.0"
 %PY_EXE% -m pip install "portpicker ~= 1.3.1"
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
index e4d8689ec22dc1..4d6ee06dc21f07 100755
--- a/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_cpu.sh
@@ -59,6 +59,7 @@ rm -f ${DIR}/tensorflow_jni.dll
 # Zip up the .dll, LICENSE and include files for the C library.
 mkdir -p ${DIR}/include/tensorflow/c
 mkdir -p ${DIR}/include/tensorflow/c/eager
+mkdir -p ${DIR}/include/tensorflow/core/platform
 mkdir -p ${DIR}/lib
 cp bazel-bin/tensorflow/tensorflow.dll ${DIR}/lib/tensorflow.dll
 cp bazel-bin/tensorflow/tensorflow.lib ${DIR}/lib/tensorflow.lib
@@ -67,8 +68,19 @@ cp tensorflow/c/c_api.h \
   tensorflow/c/tf_datatype.h \
   tensorflow/c/tf_status.h \
   tensorflow/c/tf_tensor.h \
+  tensorflow/c/tf_tstring.h \
+  tensorflow/c/tf_file_statistics.h \
+  tensorflow/c/tensor_interface.h \
+  tensorflow/c/c_api_macros.h \
+  tensorflow/c/c_api_experimental.h \
   ${DIR}/include/tensorflow/c
-cp tensorflow/c/eager/c_api.h ${DIR}/include/tensorflow/c/eager
+cp tensorflow/c/eager/c_api.h \
+  tensorflow/c/eager/c_api_experimental.h \
+  tensorflow/c/eager/dlpack.h \
+  ${DIR}/include/tensorflow/c/eager
+cp tensorflow/core/platform/ctstring.h \
+  tensorflow/core/platform/ctstring_internal.h \
+  ${DIR}/include/tensorflow/core/platform
 cp LICENSE ${DIR}/LICENSE
 cp bazel-bin/tensorflow/tools/lib_package/THIRD_PARTY_TF_C_LICENSES ${DIR}/
 cd ${DIR}
@@ -76,11 +88,23 @@ zip libtensorflow-cpu-windows-$(uname -m).zip \
   lib/tensorflow.dll \
   lib/tensorflow.lib \
   include/tensorflow/c/eager/c_api.h \
+  include/tensorflow/c/eager/c_api_experimental.h \
+  include/tensorflow/c/eager/dlpack.h \
   include/tensorflow/c/c_api.h \
   include/tensorflow/c/tf_attrtype.h \
   include/tensorflow/c/tf_datatype.h \
   include/tensorflow/c/tf_status.h \
   include/tensorflow/c/tf_tensor.h \
+  include/tensorflow/c/tf_tstring.h \
+  include/tensorflow/c/tf_file_statistics.h \
+  include/tensorflow/c/tensor_interface.h \
+  include/tensorflow/c/c_api_macros.h \
+  include/tensorflow/c/c_api_experimental.h \
+  include/tensorflow/core/platform/ctstring.h \
+  include/tensorflow/core/platform/ctstring_internal.h \
   LICENSE \
   THIRD_PARTY_TF_C_LICENSES
 rm -rf lib include
+
+cd ..
+tar -zcvf windows_cpu_libtensorflow_binaries.tar.gz lib_package
\ No newline at end of file
diff --git a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
index 2839b631833319..bf39f8ea126fc7 100644
--- a/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
+++ b/tensorflow/tools/ci_build/windows/libtensorflow_gpu.sh
@@ -59,6 +59,7 @@ rm -f ${DIR}/tensorflow_jni.dll
 # Zip up the .dll, LICENSE and include files for the C library.
 mkdir -p ${DIR}/include/tensorflow/c
 mkdir -p ${DIR}/include/tensorflow/c/eager
+mkdir -p ${DIR}/include/tensorflow/core/platform
 mkdir -p ${DIR}/lib
 cp bazel-bin/tensorflow/tensorflow.dll ${DIR}/lib/tensorflow.dll
 cp bazel-bin/tensorflow/tensorflow.lib ${DIR}/lib/tensorflow.lib
@@ -67,8 +68,19 @@ cp tensorflow/c/c_api.h \
   tensorflow/c/tf_datatype.h \
   tensorflow/c/tf_status.h \
   tensorflow/c/tf_tensor.h \
+  tensorflow/c/tf_tstring.h \
+  tensorflow/c/tf_file_statistics.h \
+  tensorflow/c/tensor_interface.h \
+  tensorflow/c/c_api_macros.h \
+  tensorflow/c/c_api_experimental.h \
   ${DIR}/include/tensorflow/c
-cp tensorflow/c/eager/c_api.h ${DIR}/include/tensorflow/c/eager
+cp tensorflow/c/eager/c_api.h \
+  tensorflow/c/eager/c_api_experimental.h \
+  tensorflow/c/eager/dlpack.h \
+  ${DIR}/include/tensorflow/c/eager
+cp tensorflow/core/platform/ctstring.h \
+  tensorflow/core/platform/ctstring_internal.h \
+  ${DIR}/include/tensorflow/core/platform
 cp LICENSE ${DIR}/LICENSE
 cp bazel-bin/tensorflow/tools/lib_package/THIRD_PARTY_TF_C_LICENSES ${DIR}/
 cd ${DIR}
@@ -76,11 +88,24 @@ zip libtensorflow-gpu-windows-$(uname -m).zip \
   lib/tensorflow.dll \
   lib/tensorflow.lib \
   include/tensorflow/c/eager/c_api.h \
+  include/tensorflow/c/eager/c_api_experimental.h \
+  include/tensorflow/c/eager/dlpack.h \
   include/tensorflow/c/c_api.h \
   include/tensorflow/c/tf_attrtype.h \
   include/tensorflow/c/tf_datatype.h \
   include/tensorflow/c/tf_status.h \
   include/tensorflow/c/tf_tensor.h \
+  include/tensorflow/c/tf_tstring.h \
+  include/tensorflow/c/tf_file_statistics.h \
+  include/tensorflow/c/tensor_interface.h \
+  include/tensorflow/c/c_api_macros.h \
+  include/tensorflow/c/c_api_experimental.h \
+  include/tensorflow/core/platform/ctstring.h \
+  include/tensorflow/core/platform/ctstring_internal.h \
   LICENSE \
   THIRD_PARTY_TF_C_LICENSES
 rm -rf lib include
+
+cd ..
+
+tar -zcvf windows_gpu_libtensorflow_binaries.tar.gz lib_package
\ No newline at end of file
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod-jupyter.Dockerfile
index f147acbd0dbc1c..a11af9ebf1e6e5 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod-jupyter.Dockerfile
@@ -85,6 +85,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     python3-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod.Dockerfile
index b6e0d0244867ab..20ec6458ef45f4 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpi-horovod.Dockerfile
@@ -85,6 +85,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     python3-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod-jupyter.Dockerfile
index b2fc1c9e03909e..162d0a938fb034 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod-jupyter.Dockerfile
@@ -81,6 +81,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     python3-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod.Dockerfile
index 8043109352e91b..c36de1475dd5f0 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-18.04-mpich-horovod.Dockerfile
@@ -81,6 +81,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     python3-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod-jupyter.Dockerfile
index 7830420f496ab2..226486577287d6 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod-jupyter.Dockerfile
@@ -95,6 +95,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     ${PYTHON}-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod.Dockerfile
index a5084ee7182d53..479a4d7cf41876 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpi-horovod.Dockerfile
@@ -95,6 +95,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     ${PYTHON}-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod-jupyter.Dockerfile
index a35aa122ced3d1..b328010b0a4964 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod-jupyter.Dockerfile
@@ -91,6 +91,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     ${PYTHON}-dev
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod.Dockerfile
index 2090352df6287e..5648c1ac582893 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/onednn/ubuntu-20.04-mpich-horovod.Dockerfile
@@ -91,6 +91,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     ${PYTHON}-dev
diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile
index f018c3a2fc5aa2..c19b891e033b1d 100644
--- a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/2004-horovod.partial.Dockerfile
@@ -6,6 +6,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     ${PYTHON}-dev
diff --git a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile
index 63c1e13443c419..abc750f24421c5 100644
--- a/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/onednn/ubuntu/horovod.partial.Dockerfile
@@ -6,6 +6,7 @@ ARG HOROVOD_VERSION=
 
 RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
     build-essential \
+    cmake \
     g++-8 \
     gcc-8 \
     python3-dev
diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml
index 421f8c56bd1b9b..5043c8ec7c618b 100644
--- a/tensorflow/tools/dockerfiles/spec.yml
+++ b/tensorflow/tools/dockerfiles/spec.yml
@@ -421,6 +421,7 @@ slice_sets:
               - TF_PACKAGE=intel-tensorflow
               - UBUNTU_VERSION=20.04
               - PYTHON=python3.7
+              - DEBIAN_FRONTEND="noninteractive"
 
     ubuntu-devel-onednn:
         - add_to_name: "-16.04-devel"
diff --git a/tensorflow/tools/dockerfiles/tools.Dockerfile b/tensorflow/tools/dockerfiles/tools.Dockerfile
index a96b2578cba757..8ae6814c91b65e 100644
--- a/tensorflow/tools/dockerfiles/tools.Dockerfile
+++ b/tensorflow/tools/dockerfiles/tools.Dockerfile
@@ -22,7 +22,7 @@ LABEL maintainer="Austin Anderson <angerson@google.com>"
 
 RUN apt-get update && apt-get install -y python3 python3-pip bash curl
 RUN curl -sSL https://get.docker.com/ | sh
-RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus docker
+RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus 'docker<=4.3.0'
 
 WORKDIR /tf
 VOLUME ["/tf"]
diff --git a/tensorflow/tools/pip_package/build_pip_package.sh b/tensorflow/tools/pip_package/build_pip_package.sh
index 7a070938045bd5..8b2772b5072041 100755
--- a/tensorflow/tools/pip_package/build_pip_package.sh
+++ b/tensorflow/tools/pip_package/build_pip_package.sh
@@ -132,10 +132,10 @@ function prepare_src() {
     unzip -o -q ./bazel-bin/tensorflow/tools/pip_package/simple_console_for_windows.zip -d ./bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip
     echo "Unzip finished."
     # runfiles structure after unzip the python binary
-    cp \
+    cp -L \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/LICENSE \
       "${TMPDIR}"
-    cp -R \
+    cp -LR \
       bazel-bin/tensorflow/tools/pip_package/simple_console_for_window_unzip/runfiles/org_tensorflow/tensorflow \
       "${TMPDIR}"
     cp_external \
@@ -149,10 +149,10 @@ function prepare_src() {
     RUNFILES=bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow
     if [ -d bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/external ]; then
       # Old-style runfiles structure (--legacy_external_runfiles).
-      cp \
+      cp -L \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/LICENSE \
         "${TMPDIR}"
-      cp -R \
+      cp -LR \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       cp_external \
@@ -172,10 +172,10 @@ function prepare_src() {
       fi
     else
       # New-style runfiles structure (--nolegacy_external_runfiles).
-      cp \
+      cp -L \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/LICENSE \
         "${TMPDIR}"
-      cp -R \
+      cp -LR \
         bazel-bin/tensorflow/tools/pip_package/build_pip_package.runfiles/org_tensorflow/tensorflow \
         "${TMPDIR}"
       cp_external \
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 65133afdafeab2..485ca1bbd8f7fa 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -50,7 +50,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.4.0'
+_VERSION = '2.4.4'
 
 
 # We use the same setup.py for all tensorflow_* packages and for the nightly
@@ -97,7 +97,7 @@
     # They are updated during the release process
     # When updating these, please also update the nightly versions below
     'tensorboard ~= 2.4',
-    'tensorflow_estimator >= 2.4.0rc0 , < 2.5.0',
+    'tensorflow_estimator >= 2.4.0 , < 2.5.0',
 ]
 
 
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 3397d1070d1c97..58b26e3cb33ebe 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -362,12 +362,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "org_sqlite",
         build_file = clean_dep("//third_party:sqlite.BUILD"),
-        sha256 = "b34f4c0c0eefad9a7e515c030c18702e477f4ef7d8ade6142bdab8011b487ac6",
-        strip_prefix = "sqlite-amalgamation-3330000",
+        sha256 = "8ff0b79fd9118af7a760f1f6a98cac3e69daed325c8f9f0a581ecb62f797fd64",
+        strip_prefix = "sqlite-amalgamation-3340000",
         system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2020/sqlite-amalgamation-3330000.zip",
-            "https://www.sqlite.org/2020/sqlite-amalgamation-3330000.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2020/sqlite-amalgamation-3340000.zip",
+            "https://www.sqlite.org/2020/sqlite-amalgamation-3340000.zip",
         ],
     )
 
@@ -638,12 +638,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "curl",
         build_file = clean_dep("//third_party:curl.BUILD"),
-        sha256 = "01ae0c123dee45b01bbaef94c0bc00ed2aec89cb2ee0fd598e0d302a6b5e0a98",
-        strip_prefix = "curl-7.69.1",
+        sha256 = "ed936c0b02c06d42cf84b39dd12bb14b62d77c7c4e875ade022280df5dcc81d7",
+        strip_prefix = "curl-7.78.0",
         system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/curl.haxx.se/download/curl-7.69.1.tar.gz",
-            "https://curl.haxx.se/download/curl-7.69.1.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/curl.haxx.se/download/curl-7.78.0.tar.gz",
+            "https://curl.haxx.se/download/curl-7.78.0.tar.gz",
         ],
     )
 
@@ -726,12 +726,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "jsoncpp_git",
         build_file = clean_dep("//third_party:jsoncpp.BUILD"),
-        sha256 = "77a402fb577b2e0e5d0bdc1cf9c65278915cdb25171e3452c68b6da8a561f8f0",
-        strip_prefix = "jsoncpp-1.9.2",
+        sha256 = "e34a628a8142643b976c7233ef381457efad79468c67cb1ae0b83a33d7493999",
+        strip_prefix = "jsoncpp-1.9.4",
         system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.9.2.tar.gz",
-            "https://github.com/open-source-parsers/jsoncpp/archive/1.9.2.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.9.4.tar.gz",
+            "https://github.com/open-source-parsers/jsoncpp/archive/1.9.4.tar.gz",
         ],
     )
 
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 62fc946956da85..85b09cf1bf4c72 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -25,22 +25,35 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
-    "lib/x509asn1.c",
-    "lib/vtls/schannel.c",
-    "lib/vtls/schannel_verify.c",
-    "lib/idn_win32.c",
+    "lib/setup-win32.h",
 ]
 
 cc_library(
     name = "curl",
     srcs = [
         "include/curl_config.h",
+        "lib/altsvc.c",
+        "lib/altsvc.h",
+        "lib/amigaos.c",
         "lib/amigaos.h",
         "lib/arpa_telnet.h",
-        "lib/asyn.h",
         "lib/asyn-ares.c",
+        "lib/asyn.h",
         "lib/base64.c",
+        "lib/bufref.c",
+        "lib/bufref.h",
+        "lib/c-hyper.c",
+        "lib/c-hyper.h",
+        "lib/config-amigaos.h",
+        "lib/config-dos.h",
+        "lib/config-mac.h",
+        "lib/config-os400.h",
+        "lib/config-plan9.h",
+        "lib/config-riscos.h",
+        "lib/config-tpf.h",
+        "lib/config-vxworks.h",
         "lib/config-win32.h",
+        "lib/config-win32ce.h",
         "lib/conncache.c",
         "lib/conncache.h",
         "lib/connect.c",
@@ -54,14 +67,20 @@ cc_library(
         "lib/curl_base64.h",
         "lib/curl_ctype.c",
         "lib/curl_ctype.h",
+        "lib/curl_des.c",
         "lib/curl_des.h",
+        "lib/curl_endian.c",
         "lib/curl_endian.h",
         "lib/curl_fnmatch.c",
         "lib/curl_fnmatch.h",
+        "lib/curl_get_line.c",
+        "lib/curl_get_line.h",
         "lib/curl_gethostname.c",
         "lib/curl_gethostname.h",
+        "lib/curl_gssapi.c",
         "lib/curl_gssapi.h",
         "lib/curl_hmac.h",
+        "lib/curl_krb5.h",
         "lib/curl_ldap.h",
         "lib/curl_md4.h",
         "lib/curl_md5.h",
@@ -70,14 +89,19 @@ cc_library(
         "lib/curl_memrchr.h",
         "lib/curl_multibyte.c",
         "lib/curl_multibyte.h",
+        "lib/curl_ntlm_core.c",
         "lib/curl_ntlm_core.h",
+        "lib/curl_ntlm_wb.c",
         "lib/curl_ntlm_wb.h",
+        "lib/curl_path.c",
+        "lib/curl_path.h",
         "lib/curl_printf.h",
+        "lib/curl_range.c",
+        "lib/curl_range.h",
         "lib/curl_rtmp.c",
         "lib/curl_rtmp.h",
         "lib/curl_sasl.c",
         "lib/curl_sasl.h",
-        "lib/curl_sec.h",
         "lib/curl_setup.h",
         "lib/curl_setup_once.h",
         "lib/curl_sha256.h",
@@ -86,23 +110,35 @@ cc_library(
         "lib/curl_threads.c",
         "lib/curl_threads.h",
         "lib/curlx.h",
+        "lib/dict.c",
         "lib/dict.h",
+        "lib/doh.c",
+        "lib/doh.h",
         "lib/dotdot.c",
         "lib/dotdot.h",
+        "lib/dynbuf.c",
+        "lib/dynbuf.h",
         "lib/easy.c",
+        "lib/easygetopt.c",
         "lib/easyif.h",
+        "lib/easyoptions.c",
+        "lib/easyoptions.h",
         "lib/escape.c",
         "lib/escape.h",
+        "lib/file.c",
         "lib/file.h",
         "lib/fileinfo.c",
         "lib/fileinfo.h",
         "lib/formdata.c",
         "lib/formdata.h",
+        "lib/ftp.c",
         "lib/ftp.h",
+        "lib/ftplistparser.c",
         "lib/ftplistparser.h",
         "lib/getenv.c",
         "lib/getinfo.c",
         "lib/getinfo.h",
+        "lib/gopher.c",
         "lib/gopher.h",
         "lib/hash.c",
         "lib/hash.h",
@@ -115,6 +151,8 @@ cc_library(
         "lib/hostip4.c",
         "lib/hostip6.c",
         "lib/hostsyn.c",
+        "lib/hsts.c",
+        "lib/hsts.h",
         "lib/http.c",
         "lib/http.h",
         "lib/http2.c",
@@ -123,17 +161,24 @@ cc_library(
         "lib/http_chunks.h",
         "lib/http_digest.c",
         "lib/http_digest.h",
+        "lib/http_negotiate.c",
         "lib/http_negotiate.h",
+        "lib/http_ntlm.c",
         "lib/http_ntlm.h",
         "lib/http_proxy.c",
         "lib/http_proxy.h",
+        "lib/http_aws_sigv4.c",
+        "lib/http_aws_sigv4.h",
+        "lib/idn_win32.c",
         "lib/if2ip.c",
         "lib/if2ip.h",
+        "lib/imap.c",
         "lib/imap.h",
         "lib/inet_ntop.h",
         "lib/inet_pton.c",
         "lib/inet_pton.h",
         "lib/krb5.c",
+        "lib/ldap.c",
         "lib/llist.c",
         "lib/llist.h",
         "lib/md4.c",
@@ -143,38 +188,43 @@ cc_library(
         "lib/mime.c",
         "lib/mime.h",
         "lib/mprintf.c",
+        "lib/mqtt.c",
+        "lib/mqtt.h",
         "lib/multi.c",
         "lib/multihandle.h",
         "lib/multiif.h",
         "lib/netrc.c",
         "lib/netrc.h",
+        "lib/non-ascii.c",
         "lib/non-ascii.h",
         "lib/nonblock.c",
         "lib/nonblock.h",
-        "lib/nwlib.c",
-        "lib/nwos.c",
+        #"lib/nwlib.c",
+        #"lib/nwos.c",
+        "lib/openldap.c",
         "lib/parsedate.c",
         "lib/parsedate.h",
-        "lib/pingpong.h",
         "lib/pingpong.c",
+        "lib/pingpong.h",
+        "lib/pop3.c",
         "lib/pop3.h",
         "lib/progress.c",
         "lib/progress.h",
+        "lib/psl.c",
+        "lib/psl.h",
         "lib/quic.h",
         "lib/rand.c",
         "lib/rand.h",
-        "lib/rename.h",
         "lib/rename.c",
+        "lib/rename.h",
         "lib/rtsp.c",
         "lib/rtsp.h",
-        "lib/security.c",
         "lib/select.c",
         "lib/select.h",
         "lib/sendf.c",
         "lib/sendf.h",
         "lib/setopt.c",
         "lib/setopt.h",
-        "lib/setup-os400.h",
         "lib/setup-vms.h",
         "lib/sha256.c",
         "lib/share.c",
@@ -182,13 +232,17 @@ cc_library(
         "lib/sigpipe.h",
         "lib/slist.c",
         "lib/slist.h",
+        "lib/smb.c",
         "lib/smb.h",
+        "lib/smtp.c",
         "lib/smtp.h",
         "lib/sockaddr.h",
-        "lib/socketpair.h",
         "lib/socketpair.c",
+        "lib/socketpair.h",
         "lib/socks.c",
         "lib/socks.h",
+        "lib/socks_gssapi.c",
+        "lib/socks_sspi.c",
         "lib/speedcheck.c",
         "lib/speedcheck.h",
         "lib/splay.c",
@@ -204,7 +258,9 @@ cc_library(
         "lib/strtoofft.c",
         "lib/strtoofft.h",
         "lib/system_win32.h",
+        "lib/telnet.c",
         "lib/telnet.h",
+        "lib/tftp.c",
         "lib/tftp.h",
         "lib/timeval.c",
         "lib/timeval.h",
@@ -213,44 +269,69 @@ cc_library(
         "lib/url.c",
         "lib/url.h",
         "lib/urldata.h",
+        "lib/urlapi-int.h",
+        "lib/urlapi.c",
+        "lib/version.c",
+        "lib/version_win32.c",
+        "lib/version_win32.h",
+        "lib/warnless.c",
+        "lib/warnless.h",
+        "lib/wildcard.c",
+        "lib/wildcard.h",
+        "lib/x509asn1.c",
+        "lib/x509asn1.h",
         "lib/vauth/cleartext.c",
         "lib/vauth/cram.c",
         "lib/vauth/digest.c",
         "lib/vauth/digest.h",
+        "lib/vauth/digest_sspi.c",
+        "lib/vauth/krb5_gssapi.c",
+        "lib/vauth/krb5_sspi.c",
+        "lib/vauth/ntlm.c",
         "lib/vauth/ntlm.h",
+        "lib/vauth/ntlm_sspi.c",
         "lib/vauth/oauth2.c",
+        "lib/vauth/spnego_sspi.c",
         "lib/vauth/vauth.c",
         "lib/vauth/vauth.h",
-        "lib/version.c",
+        "lib/vquic/ngtcp2.c",
+        "lib/vquic/ngtcp2.h",
+        "lib/vquic/quiche.c",
+        "lib/vquic/quiche.h",
+        "lib/vquic/vquic.c",
+        "lib/vquic/vquic.h",
+        "lib/vssh/libssh.c",
+        "lib/vssh/libssh2.c",
         "lib/vssh/ssh.h",
+        "lib/vssh/wolfssh.c",
+        "lib/vtls/bearssl.c",
         "lib/vtls/bearssl.h",
+        "lib/vtls/gskit.c",
         "lib/vtls/gskit.h",
+        "lib/vtls/gtls.c",
         "lib/vtls/gtls.h",
+        "lib/vtls/keylog.c",
+        "lib/vtls/keylog.h",
+        "lib/vtls/mbedtls.c",
         "lib/vtls/mbedtls.h",
+        "lib/vtls/mbedtls_threadlock.c",
+        "lib/vtls/mbedtls_threadlock.h",
+        "lib/vtls/mesalink.c",
+        "lib/vtls/mesalink.h",
+        "lib/vtls/nss.c",
         "lib/vtls/nssg.h",
+        "lib/vtls/openssl.c",
         "lib/vtls/openssl.h",
+        "lib/vtls/rustls.c",
+        "lib/vtls/rustls.h",
+        "lib/vtls/schannel.c",
         "lib/vtls/schannel.h",
+        "lib/vtls/schannel_verify.c",
+        "lib/vtls/sectransp.h",
         "lib/vtls/vtls.c",
         "lib/vtls/vtls.h",
+        "lib/vtls/wolfssl.c",
         "lib/vtls/wolfssl.h",
-        "lib/warnless.c",
-        "lib/warnless.h",
-        "lib/wildcard.c",
-        "lib/wildcard.h",
-        "lib/x509asn1.h",
-        "lib/psl.h",
-        "lib/psl.c",
-        "lib/vtls/sectransp.h",
-        "lib/vtls/mesalink.h",
-        "lib/vtls/mesalink.c",
-        "lib/curl_get_line.h",
-        "lib/curl_get_line.c",
-        "lib/urlapi-int.h",
-        "lib/urlapi.c",
-        "lib/altsvc.h",
-        "lib/altsvc.c",
-        "lib/doh.h",
-        "lib/doh.c",
     ] + select({
         "@org_tensorflow//tensorflow:macos": [
             "lib/vtls/sectransp.c",
@@ -260,7 +341,6 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:windows": CURL_WIN_SRCS,
         "//conditions:default": [
-            "lib/vtls/openssl.c",
         ],
     }),
     hdrs = [
@@ -269,6 +349,7 @@ cc_library(
         "include/curl/easy.h",
         "include/curl/mprintf.h",
         "include/curl/multi.h",
+        "include/curl/options.h",
         "include/curl/stdcheaders.h",
         "include/curl/system.h",
         "include/curl/typecheck-gcc.h",
@@ -372,6 +453,8 @@ cc_binary(
         "src/tool_doswin.h",
         "src/tool_easysrc.c",
         "src/tool_easysrc.h",
+        "src/tool_filetime.c",
+        "src/tool_filetime.h",
         "src/tool_formparse.c",
         "src/tool_formparse.h",
         "src/tool_getparam.c",
@@ -406,6 +489,8 @@ cc_binary(
         "src/tool_paramhlp.h",
         "src/tool_parsecfg.c",
         "src/tool_parsecfg.h",
+        "src/tool_progress.c",
+        "src/tool_progress.h",
         "src/tool_sdecls.h",
         "src/tool_setopt.c",
         "src/tool_setopt.h",
@@ -425,6 +510,8 @@ cc_binary(
         "src/tool_writeenv.h",
         "src/tool_writeout.c",
         "src/tool_writeout.h",
+        "src/tool_writeout_json.c",
+        "src/tool_writeout_json.h",
         "src/tool_xattr.c",
         "src/tool_xattr.h",
     ],
diff --git a/third_party/jsoncpp.BUILD b/third_party/jsoncpp.BUILD
index 7bc466c664f71e..3b4642c81098c7 100644
--- a/third_party/jsoncpp.BUILD
+++ b/third_party/jsoncpp.BUILD
@@ -13,7 +13,6 @@ cc_library(
     ],
     hdrs = [
         "include/json/allocator.h",
-        "include/json/autolink.h",
         "include/json/config.h",
         "include/json/forwards.h",
         "include/json/json.h",