diff --git a/.zenodo.json b/.zenodo.json
new file mode 100644
index 00000000000000..7161180c51ae3e
--- /dev/null
+++ b/.zenodo.json
@@ -0,0 +1,13 @@
+{
+    "description": "TensorFlow is an end-to-end open source platform for machine learning. It has a comprehensive, flexible ecosystem of tools, libraries, and community resources that lets researchers push the state-of-the-art in ML and developers easily build and deploy ML-powered applications.",
+    "license": "Apache-2.0",
+    "title": "TensorFlow",
+    "upload_type": "software",
+    "creators": [
+        {
+            "name": "TensorFlow Developers"
+        }
+    ],
+    "access_right": "open",
+    "notes": "Specific TensorFlow versions can be found in the \"Versions\" list on the right side of this page.<br>See the full list of authors <a href=\"https://github.com/tensorflow/tensorflow/graphs/contributors\">on GitHub</a>."
+}
diff --git a/RELEASE.md b/RELEASE.md
index 3468c459f4242b..c44de3a7286733 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,198 @@
+# Release 2.1.4
+
+This release introduces several vulnerability fixes:
+
+    * Fixes a heap buffer overflow in `RaggedBinCount` ([CVE-2021-29512](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29512))
+    * Fixes a heap out of bounds write in `RaggedBinCount` ([CVE-2021-29514](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29514))
+    * Fixes a type confusion during tensor casts which leads to dereferencing null pointers ([CVE-2021-29513](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29513))
+    * Fixes a reference binding to null pointer in `MatrixDiag*` ops ([CVE-2021-29515](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29515))
+    * Fixes a null pointer dereference via invalid Ragged Tensors ([CVE-2021-29516](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29516))
+    * Fixes a division by zero in `Conv3D` ([CVE-2021-29517](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29517))
+    * Fixes vulnerabilities where session operations in eager mode lead to null pointer dereferences ([CVE-2021-29518](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29518))
+    * Fixes a `CHECK`-fail in `SparseCross` caused by type confusion ([CVE-2021-29519](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29519))
+    * Fixes a segfault in `SparseCountSparseOutput` ([CVE-2021-29521](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29521))
+    * Fixes a heap buffer overflow in `Conv3DBackprop*` ([CVE-2021-29520](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29520))
+    * Fixes a division by 0 in `Conv3DBackprop*` ([CVE-2021-29522](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29522))
+    * Fixes a `CHECK`-fail in `AddManySparseToTensorsMap` ([CVE-2021-29523](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29523))
+    * Fixes a division by 0 in `Conv2DBackpropFilter` ([CVE-2021-29524](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29524))
+    * Fixes a division by 0 in `Conv2DBackpropInput` ([CVE-2021-29525](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29525))
+    * Fixes a division by 0 in `Conv2D` ([CVE-2021-29526](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29526))
+    * Fixes a division by 0 in `QuantizedConv2D` ([CVE-2021-29527](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29527))
+    * Fixes a division by 0 in `QuantizedMul` ([CVE-2021-29528](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29528))
+    * Fixes vulnerabilities caused by invalid validation in `SparseMatrixSparseCholesky` ([CVE-2021-29530](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29530))
+    * Fixes a heap buffer overflow caused by rounding ([CVE-2021-29529](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29529))
+    * Fixes a `CHECK`-fail in `tf.raw_ops.EncodePng` ([CVE-2021-29531](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29531))
+    * Fixes a heap out of bounds read in `RaggedCross` ([CVE-2021-29532](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29532))
+    * Fixes a `CHECK`-fail in `DrawBoundingBoxes` ([CVE-2021-29533](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29533))
+    * Fixes a heap buffer overflow in `QuantizedMul` ([CVE-2021-29535](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29535))
+    * Fixes a `CHECK`-fail in `SparseConcat` ([CVE-2021-29534](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29534))
+    * Fixes a heap buffer overflow in `QuantizedResizeBilinear` ([CVE-2021-29537](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29537))
+    * Fixes a heap buffer overflow in `QuantizedReshape` ([CVE-2021-29536](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29536))
+    * Fixes a division by zero in `Conv2DBackpropFilter` ([CVE-2021-29538](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29538))
+    * Fixes a heap buffer overflow in `Conv2DBackpropFilter` ([CVE-2021-29540](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29540))
+    * Fixes a heap buffer overflow in `StringNGrams` ([CVE-2021-29542](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29542))
+    * Fixes a null pointer dereference in `StringNGrams` ([CVE-2021-29541](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29541))
+    * Fixes a `CHECK`-fail in `QuantizeAndDequantizeV4Grad` ([CVE-2021-29544](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29544))
+    * Fixes a `CHECK`-fail in `CTCGreedyDecoder` ([CVE-2021-29543](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29543))
+    * Fixes a heap buffer overflow in `SparseTensorToCSRSparseMatrix` ([CVE-2021-29545](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29545))
+    * Fixes a division by 0 in `QuantizedBiasAdd` ([CVE-2021-29546](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29546))
+    * Fixes a heap out of bounds in `QuantizedBatchNormWithGlobalNormalization` ([CVE-2021-29547](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29547))
+    * Fixes a division by 0 in `QuantizedBatchNormWithGlobalNormalization` ([CVE-2021-29548](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29548))
+    * Fixes a division by 0 in `QuantizedAdd` ([CVE-2021-29549](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29549))
+    * Fixes a division by 0 in `FractionalAvgPool` ([CVE-2021-29550](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29550))
+    * Fixes an OOB read in `MatrixTriangularSolve` ([CVE-2021-29551](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29551))
+    * Fixes a heap OOB in `QuantizeAndDequantizeV3` ([CVE-2021-29553](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29553))
+    * Fixes a `CHECK`-failure in `UnsortedSegmentJoin` ([CVE-2021-29552](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29552))
+    * Fixes a division by 0 in `DenseCountSparseOutput` ([CVE-2021-29554](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29554))
+    * Fixes a division by 0 in `FusedBatchNorm` ([CVE-2021-29555](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29555))
+    * Fixes a division by 0 in `SparseMatMul` ([CVE-2021-29557](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29557))
+    * Fixes a division by 0 in `Reverse` ([CVE-2021-29556](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29556))
+    * Fixes a heap buffer overflow in `SparseSplit` ([CVE-2021-29558](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29558))
+    * Fixes a heap OOB access in unicode ops ([CVE-2021-29559](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29559))
+    * Fixes a heap buffer overflow in `RaggedTensorToTensor` ([CVE-2021-29560](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29560))
+    * Fixes a `CHECK`-fail in `LoadAndRemapMatrix` ([CVE-2021-29561](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29561))
+    * Fixes a `CHECK`-fail in `tf.raw_ops.IRFFT` ([CVE-2021-29562](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29562))
+    * Fixes a `CHECK`-fail in `tf.raw_ops.RFFT` ([CVE-2021-29563](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29563))
+    * Fixes a null pointer dereference in `EditDistance` ([CVE-2021-29564](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29564))
+    * Fixes a null pointer dereference in `SparseFillEmptyRows` ([CVE-2021-29565](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29565))
+    * Fixes a heap OOB access in `Dilation2DBackpropInput` ([CVE-2021-29566](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29566))
+    * Fixes a reference binding to null in `ParameterizedTruncatedNormal` ([CVE-2021-29568](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29568))
+    * Fixes a set of vulnerabilities caused by lack of validation in `SparseDenseCwiseMul` ([CVE-2021-29567](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29567))
+    * Fixes a heap out of bounds read in `MaxPoolGradWithArgmax` ([CVE-2021-29570](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29570))
+    * Fixes a heap out of bounds read in `RequantizationRange` ([CVE-2021-29569](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29569))
+    * Fixes a memory corruption in `DrawBoundingBoxesV2` ([CVE-2021-29571](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29571))
+    * Fixes a reference binding to nullptr in `SdcaOptimizer` ([CVE-2021-29572](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29572))
+    * Fixes an overflow and a denial of service in `tf.raw_ops.ReverseSequence` ([CVE-2021-29575](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29575))
+    * Fixes a division by 0 in `MaxPoolGradWithArgmax` ([CVE-2021-29573](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29573))
+    * Fixes an undefined behavior in `MaxPool3DGradGrad` ([CVE-2021-29574](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29574))
+    * Fixes a heap buffer overflow in `MaxPool3DGradGrad` ([CVE-2021-29576](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29576))
+    * Fixes a heap buffer overflow in `AvgPool3DGrad` ([CVE-2021-29577](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29577))
+    * Fixes an undefined behavior and a `CHECK`-fail in `FractionalMaxPoolGrad` ([CVE-2021-29580](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29580))
+    * Fixes a heap buffer overflow in `FractionalAvgPoolGrad` ([CVE-2021-29578](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29578))
+    * Fixes a heap buffer overflow in `MaxPoolGrad` ([CVE-2021-29579](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29579))
+    * Fixes a segfault in `CTCBeamSearchDecoder` ([CVE-2021-29581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29581))
+    * Fixes a heap OOB read in `tf.raw_ops.Dequantize` ([CVE-2021-29582](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29582))
+    * Fixes a `CHECK`-fail due to integer overflow ([CVE-2021-29584](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29584))
+    * Fixes a heap buffer overflow and undefined behavior in `FusedBatchNorm` ([CVE-2021-29583](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29583))
+    * Fixes a division by zero in padding computation in TFLite ([CVE-2021-29585](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29585))
+    * Fixes a division by zero in optimized pooling implementations in TFLite ([CVE-2021-29586](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29586))
+    * Fixes a division by zero in TFLite's implementation of `SpaceToDepth` ([CVE-2021-29587](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29587))
+    * Fixes a division by zero in TFLite's implementation of `GatherNd` ([CVE-2021-29589](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29589))
+    * Fixes a division by zero in TFLite's implementation of `TransposeConv` ([CVE-2021-29588](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29588))
+    * Fixes a heap OOB read in TFLite's implementation of `Minimum` or `Maximum` ([CVE-2021-29590](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29590))
+    * Fixes a null pointer dereference in TFLite's `Reshape` operator ([CVE-2021-29592](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29592))
+    * Fixes a stack overflow due to looping TFLite subgraph ([CVE-2021-29591](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29591))
+    * Fixes a division by zero in TFLite's implementation of `DepthToSpace` ([CVE-2021-29595](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29595))
+    * Fixes a division by zero in TFLite's convolution code ([CVE-2021-29594](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29594))
+    * Fixes a division by zero in TFLite's implementation of `EmbeddingLookup` ([CVE-2021-29596](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29596))
+    * Fixes a division by zero in TFLite's implementation of `BatchToSpaceNd` ([CVE-2021-29593](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29593))
+    * Fixes a division by zero in TFLite's implementation of `SpaceToBatchNd` ([CVE-2021-29597](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29597))
+    * Fixes a division by zero in TFLite's implementation of `SVDF` ([CVE-2021-29598](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29598))
+    * Fixes a division by zero in TFLite's implementation of `Split` ([CVE-2021-29599](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29599))
+    * Fixes a division by zero in TFLite's implementation of `OneHot` ([CVE-2021-29600](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29600))
+    * Fixes a division by zero in TFLite's implementation of `DepthwiseConv` ([CVE-2021-29602](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29602))
+    * Fixes a division by zero in TFLite's implementation of hashtable lookup ([CVE-2021-29604](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29604))
+    * Fixes a integer overflow in TFLite concatentation ([CVE-2021-29601](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29601))
+    * Fixes a integer overflow in TFLite memory allocation ([CVE-2021-29605](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29605))
+    * Fixes a heap OOB write in TFLite ([CVE-2021-29603](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29603))
+    * Fixes a heap OOB read in TFLite ([CVE-2021-29606](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29606))
+    * Fixes a heap OOB and null pointer dereference in `RaggedTensorToTensor` ([CVE-2021-29608](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29608))
+    * Fixes vulnerabilities caused by incomplete validation in `SparseAdd` ([CVE-2021-29609](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29609))
+    * Fixes vulnerabilities caused by incomplete validation in `SparseSparseMinimum` ([CVE-2021-29607](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29607))
+    * Fixes vulnerabilities caused by incomplete validation in `SparseReshape` ([CVE-2021-29611](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29611))
+    * Fixes vulnerabilities caused by invalid validation in `QuantizeAndDequantizeV2` ([CVE-2021-29610](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29610))
+    * Fixes a heap buffer overflow in `BandedTriangularSolve` ([CVE-2021-29612](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29612))
+    * Fixes vulnerabilities caused by incomplete validation in `tf.raw_ops.CTCLoss` ([CVE-2021-29613](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29613))
+    * Fixes an interpreter crash from vulnerabilities in `tf.io.decode_raw` ([CVE-2021-29614](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29614))
+    * Fixes a stack overflow in `ParseAttrValue` with nested tensors ([CVE-2021-29615](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29615))
+    * Fixes a null dereference in Grappler's `TrySimplify` ([CVE-2021-29616](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29616))
+    * Fixes a crash in `tf.transpose` with complex inputs ([CVE-2021-29618](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29618))
+    * Fixes a crash in `tf.strings.substr` due to `CHECK`-fail ([CVE-2021-29617](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29617))
+    * Fixes a segfault in `tf.raw_ops.SparseCountSparseOutput` ([CVE-2021-29619](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29619))
+    * Fixes a segfault in `tf.raw_ops.ImmutableConst` ([CVE-2021-29539](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-29539))
+    * Updates `curl` to `7.76.0` to handle [CVE-2020-8169](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8169), [CVE-2020-8177](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8177), [CVE-2020-8231](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8231), [CVE-2020-8284](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8284), [CVE-2020-8285](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8285) and [CVE-2020-8286](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-8286).
+
+# Release 2.1.3
+
+## Bug Fixes and Other Changes
+* Fixes an access to unitialized memory in Eigen code
+  ([CVE-2020-26266](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26266))
+* Fixes a security vulnerability caused by lack of validation in
+  `tf.raw_ops.DataFormatVecPermute` and `tf.raw_ops.DataFormatDimMap`
+  ([CVE-2020-26267](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26267))
+* Fixes a vulnerability caused by attempting to write to immutable memory region in
+  `tf.raw_ops.ImmutableConst`
+  ([CVE-2020-26268](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26268)
+* Fixes a `CHECK`-fail in LSTM with zero-length input
+  ([CVE-2020-26270](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26270))
+* Fixes a security vulnerability caused by accessing heap data outside of bounds
+  when loading a specially crafted `SavedModel`
+  ([CVE-2020-26271](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26271))
+* Updates `libjpeg-turbo` to `2.0.5` to handle
+  [CVE-2020-13790](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13790).
+* Updates `junit` to `4.13.1` to handle
+  [CVE-2020-15250](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15250).
+* Updates `PCRE` to `8.44` to handle
+  [CVE-2019-20838](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-20838)
+  and
+  [CVE-2020-14155](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-14155).
+* Updates `sqlite3` to `3.44.0` to keep in sync with master branch.
+* Newer ROCm versions are supported on the 2.1 branch. 
+
+# Release 2.1.2
+
+## Bug Fixes and Other Changes
+* Fixes an undefined behavior causing a segfault in `tf.raw_ops.Switch`
+  ([CVE-2020-15190](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15190))
+* Fixes three vulnerabilities in conversion to DLPack format
+  ([CVE-2020-15191](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15191),
+  [CVE-2020-15192](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15192),
+  [CVE-2020-15193](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15193))
+* Fixes two vulnerabilities in `SparseFillEmptyRowsGrad`
+  ([CVE-2020-15194](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15194),
+  [CVE-2020-15195](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15195))
+* Fixes an integer truncation vulnerability in code using the work sharder API
+  ([CVE-2020-15202](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15202))
+* Fixes a format string vulnerability in `tf.strings.as_string`
+  ([CVE-2020-15203](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15203))
+* Fixes segfault raised by calling session-only ops in eager mode
+  ([CVE-2020-15204](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15204))
+* Fixes data leak and potential ASLR violation from `tf.raw_ops.StringNGrams`
+  ([CVE-2020-15205](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15205))
+* Fixes segfaults caused by incomplete `SavedModel` validation
+  ([CVE-2020-15206](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15206))
+* Fixes a data corruption due to a bug in negative indexing support in TFLite
+  ([CVE-2020-15207](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15207))
+* Fixes a data corruption due to dimension mismatch in TFLite
+  ([CVE-2020-15208](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15208))
+* Fixes several vulnerabilities in TFLite saved model format
+  ([CVE-2020-15209](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15209),
+  [CVE-2020-15210](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15210),
+  [CVE-2020-15211](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15211))
+* Updates `sqlite3` to `3.33.00` to handle
+  [CVE-2020-9327](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-9327),
+  [CVE-2020-11655](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11655),
+  [CVE-2020-11656](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-11656),
+  [CVE-2020-13434](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13434),
+  [CVE-2020-13435](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13435),
+  [CVE-2020-13630](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13630),
+  [CVE-2020-13631](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13631),
+  [CVE-2020-13871](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-13871),
+  and
+  [CVE-2020-15358](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15358).
+* Removes `scipy` dependency from `setup.py` since TensorFlow does not need it
+  to install the pip package
+* Switches ROCM builds to use ROCM 3.7
+
+# Release 2.1.1
+
+## Bug Fixes and Other Changes
+* Updates `sqlite3` to `3.31.01` to handle [CVE-2019-19880](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-19880), [CVE-2019-19244](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-19244) and [CVE-2019-19645](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-19645)
+* Updates `curl` to `7.69.1` to handle [CVE-2019-15601](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-15601)
+* Updates `libjpeg-turbo` to `2.0.4` to handle [CVE-2018-19664](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-19664), [CVE-2018-20330](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-20330) and [CVE-2019-13960](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-13960)
+* Updates Apache Spark to `2.4.5` to handle [CVE-2019-10099](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2019-10099), [CVE-2018-17190](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-17190) and [CVE-2018-11770](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2018-11770)
+* Fixes a versioning bug which causes Keras layers from TF 1.x to be used instead of those from TF 2.x
+
 # Release 2.1.0
 
 TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support [officially ends an January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update). [As announced earlier](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ), TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.
diff --git a/WORKSPACE b/WORKSPACE
index babb14b509e5c0..bcdd4f46e2f527 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -67,7 +67,7 @@ http_archive(
 http_archive(
     name = "bazel_skylib",
     sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0",
-    urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel-skylib.0.9.0.tar.gz"],
+    urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz"],
 )  # https://github.com/bazelbuild/bazel-skylib/releases
 http_archive(
     name = "com_github_apple_swift_swift_protobuf",
diff --git a/configure.py b/configure.py
index 8ec47294b47e88..4d1964f427c7ce 100644
--- a/configure.py
+++ b/configure.py
@@ -1170,7 +1170,8 @@ def system_specific_test_config(env):
     write_to_bazelrc('test --test_tag_filters=-gpu,-nomac,-no_mac')
     write_to_bazelrc('test --build_tag_filters=-gpu,-nomac,-no_mac')
   elif is_linux():
-    if env.get('TF_NEED_CUDA', None) == '1':
+    if ((env.get('TF_NEED_CUDA', None) == '1') or
+        (env.get('TF_NEED_ROCM', None) == '1')):
       write_to_bazelrc('test --test_tag_filters=-no_gpu')
       write_to_bazelrc('test --build_tag_filters=-no_gpu')
       write_to_bazelrc('test --test_env=LD_LIBRARY_PATH')
@@ -1414,6 +1415,10 @@ def main():
     write_action_env_to_bazelrc('LD_LIBRARY_PATH',
                                 environ_cp.get('LD_LIBRARY_PATH'))
 
+  if (environ_cp.get('TF_NEED_ROCM') == '1' and environ_cp.get('ROCM_PATH')):
+    write_action_env_to_bazelrc('ROCM_PATH', environ_cp.get('ROCM_PATH'))
+    write_action_env_to_bazelrc('ROCM_ROOT', environ_cp.get('ROCM_PATH'))
+
   environ_cp['TF_NEED_CUDA'] = str(
       int(get_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)))
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 2ccb9854622282..7e87e58d37e1bf 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -875,7 +875,7 @@ gen_api_init_files(
     output_files = TENSORFLOW_API_INIT_FILES_V1,
     output_package = "tensorflow._api.v1",
     root_file_name = "v1.py",
-    root_init_template = "api_template_v1.__init__.py",
+    root_init_template = "$(location api_template_v1.__init__.py)",
 )
 
 gen_api_init_files(
@@ -898,7 +898,7 @@ gen_api_init_files(
     output_files = TENSORFLOW_API_INIT_FILES_V2,
     output_package = "tensorflow._api.v2",
     root_file_name = "v2.py",
-    root_init_template = "api_template.__init__.py",
+    root_init_template = "$(location api_template.__init__.py)",
 )
 
 py_library(
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index c515cc76b9aacd..2a53f973f32f49 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -89,6 +89,7 @@
 # Enable TF2 behaviors
 from tensorflow.python.compat import v2_compat as _compat  # pylint: disable=g-import-not-at-top
 _compat.enable_v2_behavior()
+_major_api_version = 2
 
 
 # Load all plugin libraries from site-packages/tensorflow-plugins if we are
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 2b2899c3fe031e..b6dba2d35dae9a 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -104,6 +104,8 @@
 _current_module.app.flags = flags  # pylint: disable=undefined-variable
 setattr(_current_module, "flags", flags)
 
+_major_api_version = 1
+
 # Load all plugin libraries from site-packages/tensorflow-plugins if we are
 # running under pip.
 # TODO(gunan): Enable setting an environment variable to define arbitrary plugin
diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc
index f5a09e09dcda3e..942ec08f451a2d 100644
--- a/tensorflow/cc/gradients/nn_grad_test.cc
+++ b/tensorflow/cc/gradients/nn_grad_test.cc
@@ -259,6 +259,9 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) {
   RunTest(x, x_init_value, y, y_shape);
 }
 
+// TODO(rocm):
+// Re-enable this test once 3D pooling is supported on ROCm platform
+#ifndef TENSORFLOW_USE_ROCM
 TEST_F(NNGradTest, MaxPool3DGradHelper) {
   TensorShape x_shape({1, 3, 3, 3, 1});
   TensorShape y_shape({1, 1, 1, 1, 1});
@@ -271,6 +274,7 @@ TEST_F(NNGradTest, MaxPool3DGradHelper) {
   SetRandomValuesForMaxPooling<float>(&x_init_value);
   RunTest(x, x_init_value, y, y_shape);
 }
+#endif
 
 TEST_F(NNGradTest, AvgPoolGradHelper) {
   TensorShape x_shape({1, 2, 2, 1});
@@ -283,6 +287,9 @@ TEST_F(NNGradTest, AvgPoolGradHelper) {
   RunTest(x, x_shape, y, y_shape);
 }
 
+// TODO(rocm):
+// Re-enable this test once 3D pooling is supported on ROCm platform
+#ifndef TENSORFLOW_USE_ROCM
 TEST_F(NNGradTest, AvgPool3DGradHelper) {
   TensorShape x_shape({1, 3, 3, 3, 1});
   TensorShape y_shape({1, 1, 1, 1, 1});
@@ -293,6 +300,7 @@ TEST_F(NNGradTest, AvgPool3DGradHelper) {
   auto y = AvgPool3D(scope_, x, ksize, strides, "SAME");
   RunTest(x, x_shape, y, y_shape);
 }
+#endif
 
 TEST_F(NNGradTest, LRN) {
   TensorShape x_shape({1, 1, 2, 1});
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index 7815dbd3a4c65c..fa62816637f8c9 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -19,6 +19,11 @@ limitations under the License.
 
 #include "tensorflow/cc/saved_model/constants.h"
 #include "tensorflow/cc/saved_model/reader.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/tensor.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/io/path.h"
 #include "tensorflow/core/lib/monitoring/counter.h"
 #include "tensorflow/core/lib/monitoring/sampler.h"
@@ -65,12 +70,54 @@ uint64 GetLatencyMicroseconds(const uint64 start_microseconds) {
   return end_microseconds - start_microseconds;
 }
 
+// Ensure that constant tensors loaded from the saved model have valid shape.
+// Also ensure that constant nodes have a value assigned to them.
+// TODO(b/154763635): this is temporary and will be replaced with a better audit
+static Status ValidateNode(const NodeDef& node) {
+  const auto node_iterator = node.attr().find("value");
+  if (node_iterator != node.attr().end()) {
+    AttrValue node_value = node_iterator->second;
+    if (node_value.has_tensor()) {
+      const PartialTensorShape node_shape(node_value.tensor().tensor_shape());
+      if (node_shape.num_elements() < 0) {
+        return errors::FailedPrecondition(
+            "Saved model contains node \"", node.name(), "\" (op \"", node.op(),
+            "\") which initializes from a tensor with ",
+            node_shape.num_elements(), " elements");
+      }
+    }
+  } else if (node.op() == "Const") {
+    return errors::FailedPrecondition(
+        "Saved model contains node \"", node.name(),
+        "\" which is a constant tensor but no value has been provided");
+  }
+  return Status::OK();
+}
+
+static Status ValidateSavedTensors(const GraphDef& graph_def) {
+  for (const auto& node : graph_def.node()) {
+    TF_RETURN_IF_ERROR(ValidateNode(node));
+  }
+
+  if (graph_def.has_library()) {
+    const FunctionDefLibrary& library = graph_def.library();
+    for (const auto& function : library.function()) {
+      for (const auto& node : function.node_def()) {
+	TF_RETURN_IF_ERROR(ValidateNode(node));
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
 Status LoadMetaGraphIntoSession(const MetaGraphDef& meta_graph_def,
                                 const SessionOptions& session_options,
                                 std::unique_ptr<Session>* session) {
   Session* session_p = nullptr;
   TF_RETURN_IF_ERROR(NewSession(session_options, &session_p));
   session->reset(session_p);
+  TF_RETURN_IF_ERROR(ValidateSavedTensors(meta_graph_def.graph_def()));
   return (*session)->Create(meta_graph_def.graph_def());
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index 84616f3a37b2d8..5f617babcb777f 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -542,10 +542,9 @@ static std::vector<string> GetROCDLPaths(int amdgpu_version,
                                          const string& rocdl_dir_path) {
   // AMDGPU version-neutral bitcodes.
   static std::vector<string>* rocdl_filenames = new std::vector<string>(
-      {"hc.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc",
-       "oclc_finite_only_off.amdgcn.bc", "oclc_daz_opt_off.amdgcn.bc",
-       "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
-       "oclc_unsafe_math_off.amdgcn.bc"});
+      {"hc.bc", "opencl.bc", "ocml.bc", "ockl.bc", "oclc_finite_only_off.bc",
+       "oclc_daz_opt_off.bc", "oclc_correctly_rounded_sqrt_on.bc",
+       "oclc_unsafe_math_off.bc"});
 
   // Construct full path to ROCDL bitcode libraries.
   std::vector<string> result;
@@ -556,7 +555,7 @@ static std::vector<string> GetROCDLPaths(int amdgpu_version,
   // Add AMDGPU version-specific bitcodes.
   result.push_back(tensorflow::io::JoinPath(
       rocdl_dir_path,
-      absl::StrCat("oclc_isa_version_", amdgpu_version, ".amdgcn.bc")));
+      absl::StrCat("oclc_isa_version_", amdgpu_version, ".bc")));
   return result;
 }
 
@@ -620,8 +619,10 @@ StatusOr<std::vector<uint8>> EmitModuleToHsaco(
   // Locate lld.
   // TODO(whchung@gmail.com): change to tensorflow::ROCmRoot() after
   // ROCm-Device-Libs PR.
-  std::string lld_path = tensorflow::io::JoinPath("/opt/rocm", "hcc/bin");
-  auto lld_program = llvm::sys::findProgramByName("ld.lld", {lld_path});
+  std::string lld_path_1 = tensorflow::io::JoinPath("/opt/rocm", "hcc/bin");
+  std::string lld_path_2 = tensorflow::io::JoinPath("/opt/rocm", "llvm/bin");
+  auto lld_program =
+      llvm::sys::findProgramByName("ld.lld", {lld_path_1, lld_path_2});
   if (!lld_program) {
     return xla::InternalError("unable to find ld.lld in PATH: %s",
                               lld_program.getError().message());
@@ -685,7 +686,7 @@ std::unique_ptr<llvm::TargetMachine> AMDGPUGetTargetMachine(
     llvm::Triple target_triple, int amdgpu_version,
     const HloModuleConfig& hlo_module_config) {
   return GetTargetMachine(target_triple, absl::StrCat("gfx", amdgpu_version),
-                          hlo_module_config, "-code-object-v3");
+                          hlo_module_config, "+code-object-v3");
 }
 
 void AMDGPUBackendInit(const HloModuleConfig& hlo_module_config) {
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
index 3afc9ad9a62bf8..e7760b74d841ff 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@@ -323,7 +323,12 @@ Status KernelAndDeviceOp::Run(
   if (outputs != nullptr) {
     outputs->clear();
     for (int i = 0; i < context.num_outputs(); ++i) {
-      outputs->push_back(Tensor(*context.mutable_output(i)));
+      const auto* output_tensor = context.mutable_output(i);
+      if (output_tensor != nullptr) {
+        outputs->push_back(Tensor(*output_tensor));
+      } else {
+        outputs->push_back(Tensor());
+      }
     }
   }
   return Status::OK();
diff --git a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc
index aba84864f087eb..bf83e7962f1343 100644
--- a/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc
+++ b/tensorflow/core/distributed_runtime/collective_param_resolver_distributed_test.cc
@@ -315,7 +315,7 @@ TEST_F(DeviceResDistTest, Workers2Devices2) {
   ValidateCollectiveParams(num_workers, num_devices);
 }
 
-#ifndef GOOGLE_CUDA
+#if !GOOGLE_CUDA && !TENSORFLOW_USE_ROCM
 namespace {
 // A mock NcclReducer for testing group runtime details initialization with CPU
 // builds.  The only meaningful function in this class is
diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc
index f911b5b7b6f6d5..1a332d1517ddc0 100644
--- a/tensorflow/core/framework/attr_value_util.cc
+++ b/tensorflow/core/framework/attr_value_util.cc
@@ -38,6 +38,9 @@ namespace {
 // Do not construct large tensors to compute their hash or compare for equality.
 constexpr int kMaxAttrValueTensorByteSize = 32 * 1024 * 1024;  // 32mb
 
+// Limit nesting of tensors to 100 deep to prevent memory overflow.
+constexpr int kMaxTensorNestDepth = 100;
+
 // Return the size of the tensor represented by this TensorProto. If shape is
 // not fully defined return -1.
 int64 TensorByteSize(const TensorProto& t) {
@@ -224,6 +227,54 @@ string SummarizeFunc(const NameAttrList& func) {
   return strings::StrCat(func.name(), "[", absl::StrJoin(entries, ", "), "]");
 }
 
+bool ParseAttrValueHelper_TensorNestsUnderLimit(int limit, string to_parse) {
+  int nests = 0;
+  int maxed_out = to_parse.length();
+  int open_curly = to_parse.find('{');
+  int open_bracket = to_parse.find('<');
+  int close_curly = to_parse.find('}');
+  int close_bracket = to_parse.find('>');
+  if (open_curly == -1) {
+    open_curly = maxed_out;
+  }
+  if (open_bracket == -1) {
+    open_bracket = maxed_out;
+  }
+  int min = std::min(open_curly, open_bracket);
+  do {
+    if (open_curly == maxed_out && open_bracket == maxed_out) {
+      return true;
+    }
+    if (min == open_curly) {
+      nests += 1;
+      open_curly = to_parse.find('{', open_curly + 1);
+      if (open_curly == -1) {
+        open_curly = maxed_out;
+      }
+    } else if (min == open_bracket) {
+      nests += 1;
+      open_bracket = to_parse.find('<', open_bracket + 1);
+      if (open_bracket == -1) {
+        open_bracket = maxed_out;
+      }
+    } else if (min == close_curly) {
+      nests -= 1;
+      close_curly = to_parse.find('}', close_curly + 1);
+      if (close_curly == -1) {
+        close_curly = maxed_out;
+      }
+    } else if (min == close_bracket) {
+      nests -= 1;
+      close_bracket = to_parse.find('>', close_bracket + 1);
+      if (close_bracket == -1) {
+        close_bracket = maxed_out;
+      }
+    }
+    min = std::min({open_curly, open_bracket, close_curly, close_bracket});
+  } while (nests < 100);
+  return false;
+}
+
 }  // namespace
 
 string SummarizeAttrValue(const AttrValue& attr_value) {
@@ -448,7 +499,12 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
   } else {
     to_parse = strings::StrCat(field_name, ": ", text);
   }
-
+  if (field_name == "tensor") {
+    if (!ParseAttrValueHelper_TensorNestsUnderLimit(kMaxTensorNestDepth,
+                                                    to_parse)) {
+      return false;
+    }
+  }
   return ProtoParseFromString(to_parse, out);
 }
 
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index 5a805520f519de..3f7a63e4d77390 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -1436,6 +1436,17 @@ void GraphConstructor::Undo() {
 
 Status GraphConstructor::MakeEdge(Node* src, int output_index, Node* dst,
                                   int input_index) {
+  if (output_index >= src->num_outputs()) {
+    return errors::InvalidArgument(
+        "Output ", output_index, " of node ", src->name(),
+        " does not exist. Node only has ", src->num_outputs(), " outputs.");
+  }
+  if (input_index >= dst->num_inputs()) {
+    return errors::InvalidArgument(
+        "Input ", input_index, " of node ", dst->name(),
+        " does not exist. Node only has ", dst->num_inputs(), " inputs.");
+  }
+
   DataType src_out = src->output_type(output_index);
   DataType dst_in = dst->input_type(input_index);
   if (!TypesCompatible(dst_in, src_out)) {
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index d2ff480c29d3c6..fec54119117ec9 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -1970,6 +1970,12 @@ class ReorderCastLikeAndValuePreserving : public ArithmeticOptimizerStage {
 
   Status TrySimplify(NodeDef* consumer, string* simplified_node_name) override {
     NodeDef* producer;
+
+    if (consumer->input_size() < 1) {
+      return errors::FailedPrecondition("Node ", simplified_node_name,
+                                        " lacks inputs");
+    }
+
     TF_RETURN_IF_ERROR(GetInputNode(consumer->input(0), &producer));
     const bool producer_is_cast = IsCastLike(*producer);
     const bool can_optimize =
@@ -2369,6 +2375,11 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage {
   ~ReplaceMulWithSquare() override = default;
 
   bool IsSupported(const NodeDef* node) const override {
+    if (!node || node->input_size() < 2) {
+      // Invalid node
+      return false;
+    }
+
     return IsAnyMul(*node) && node->input(0) == node->input(1);
   }
 
diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
index 8bf543f914ddad..616b91308e25a6 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc
@@ -548,12 +548,14 @@ TEST_F(ConstantFoldingTest, ConstantPushDownBiasAdd) {
   }
 }
 
+// This test fails on ROCm platform (see commit message for details)
+#ifndef TENSORFLOW_USE_ROCM
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_ScalarConst) {
   for (string data_format : {
          "NHWC",
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
              "NCHW"
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
        }) {
     MulConvPushDownTest(
         /*input_shape=*/data_format == "NHWC" ? TensorShape{4, 10, 10, 3}
@@ -565,13 +567,16 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_ScalarConst) {
         /*expect_folded=*/true);
   }
 }
+#endif
 
+// This test fails on ROCm platform (see commit message for details)
+#ifndef TENSORFLOW_USE_ROCM
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst) {
   for (string data_format : {
          "NHWC",
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
              "NCHW"
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
        }) {
     for (auto mul_const_input_shape :
          {TensorShape{1}, TensorShape{1, 1, 1, 1}}) {
@@ -585,14 +590,15 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_SingletonConst) {
     }
   }
 }
+#endif
 
 TEST_F(ConstantFoldingTest,
        MulConvPushDownTest_Conv2D_SingletonConst_ShapeMismatch) {
   for (string data_format : {
          "NHWC",
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
              "NCHW"
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
        }) {
     MulConvPushDownTest(
         /*input_shape=*/data_format == "NHWC" ? TensorShape{4, 10, 10, 3}
@@ -608,9 +614,9 @@ TEST_F(ConstantFoldingTest,
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_3x1x3Const) {
   for (auto data_format : {
          "NHWC",
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
              "NCHW"
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
        }) {
     MulConvPushDownTest(
         /*input_shape=*/{3, 3, 3, 3},
@@ -635,7 +641,7 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_NHWC_VectorLikeConst) {
   }
 }
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_NCHW_VectorLikeConst) {
   for (auto mul_const_input_shape :
        {TensorShape{3}, TensorShape{3, 1, 1}, TensorShape{1, 3, 1, 1}}) {
@@ -649,14 +655,14 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_NCHW_VectorLikeConst) {
         /*expect_folded=*/false);
   }
 }
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_3x1Const) {
   for (auto data_format : {
          "NHWC",
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
              "NCHW"
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
        }) {
     MulConvPushDownTest(
         /*input_shape=*/{3, 3, 3, 3},
@@ -668,6 +674,8 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv2D_3x1Const) {
   }
 }
 
+// This test fails on ROCm platform (see commit message for details)
+#ifndef TENSORFLOW_USE_ROCM
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv3D_NDHWC_1x1x3Const) {
   MulConvPushDownTest(
       /*input_shape=*/{3, 3, 3, 3, 3},
@@ -678,6 +686,7 @@ TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv3D_NDHWC_1x1x3Const) {
       /*data_format=*/"NDHWC",
       /*expect_folded=*/true);
 }
+#endif
 
 TEST_F(ConstantFoldingTest, MulConvPushDownTest_Conv3D_NCDHW_3x1x1x1Const) {
   MulConvPushDownTest(
diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
index 0734c32948b159..1ad38d6a16fb31 100644
--- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc
@@ -67,6 +67,12 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const {
     // The output values of this node may be needed.
     return false;
   }
+
+  if (node.input_size() < 1) {
+    // Node lacks input, is invalid
+    return false;
+  }
+
   const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
   CHECK(input != nullptr) << "node = " << node.name()
                           << " input = " << node.input(0);
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index ac9c6299833e44..ca79757899b98c 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5713,6 +5713,24 @@ tf_kernel_library(
     deps = STRING_DEPS,
 )
 
+tf_cc_test(
+    name = "as_string_op_test",
+    size = "small",
+    srcs = ["as_string_op_test.cc"],
+    deps = [
+        ":as_string_op",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_kernel_library(
     name = "unicode_ops",
     prefix = "unicode_ops",
diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc
index 8341909fbc8409..b9af976a654d99 100644
--- a/tensorflow/core/kernels/as_string_op.cc
+++ b/tensorflow/core/kernels/as_string_op.cc
@@ -65,9 +65,26 @@ class AsStringOp : public OpKernel {
     OP_REQUIRES(ctx, !(scientific && shortest),
                 errors::InvalidArgument(
                     "Cannot select both scientific and shortest notation"));
+
     format_ = "%";
+    if (!fill_string.empty()) {
+      switch (fill_string[0]) {
+        case ' ':
+        case '+':
+        case '-':
+        case '0':
+        case '#':
+          strings::Appendf(&format_, "%s", fill_string.c_str());
+          break;
+        default:
+          bool fill_not_supported = true;
+          OP_REQUIRES(ctx, !fill_not_supported,
+                      errors::InvalidArgument("Fill argument not supported: \"",
+                                              fill_string, "\""));
+      }
+    }
     if (width > -1) {
-      strings::Appendf(&format_, "%s%d", fill_string.c_str(), width);
+      strings::Appendf(&format_, "%d", width);
     }
     if (precision > -1) {
       strings::Appendf(&format_, ".%d", precision);
diff --git a/tensorflow/core/kernels/as_string_op_test.cc b/tensorflow/core/kernels/as_string_op_test.cc
new file mode 100644
index 00000000000000..dff78e25e72025
--- /dev/null
+++ b/tensorflow/core/kernels/as_string_op_test.cc
@@ -0,0 +1,245 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace tensorflow {
+namespace {
+
+class AsStringGraphTest : public OpsTestBase {
+ protected:
+  Status Init(DataType input_type, const string& fill = "", int width = -1,
+              int precision = -1, bool scientific = false,
+              bool shortest = false) {
+    TF_CHECK_OK(NodeDefBuilder("op", "AsString")
+                    .Input(FakeInput(input_type))
+                    .Attr("fill", fill)
+                    .Attr("precision", precision)
+                    .Attr("scientific", scientific)
+                    .Attr("shortest", shortest)
+                    .Attr("width", width)
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(AsStringGraphTest, Int8) {
+  TF_ASSERT_OK(Init(DT_INT8));
+
+  AddInputFromArray<int8>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {"-42", "0", "42"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Int64) {
+  TF_ASSERT_OK(Init(DT_INT64));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {"-42", "0", "42"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatDefault) {
+  TF_ASSERT_OK(Init(DT_FLOAT));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(
+      &expected, {"-42.000000", "0.000000", "3.141590", "42.000000"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatScientific) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1,
+                    /*scientific=*/true));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {"-4.200000e+01", "0.000000e+00",
+                                        "3.141590e+00", "4.200000e+01"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatShortest) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1,
+                    /*scientific=*/false, /*shortest=*/true));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {"-42", "0", "3.14159", "42"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatPrecisionOnly) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/2));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {"-42.00", "0.00", "3.14", "42.00"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatWidthOnly) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/5));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(
+      &expected, {"-42.000000", "0.000000", "3.141590", "42.000000"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Float_5_2_Format) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/5, /*precision=*/2));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {"-42.00", " 0.00", " 3.14", "42.00"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Complex) {
+  TF_ASSERT_OK(Init(DT_COMPLEX64, /*fill=*/"", /*width=*/5, /*precision=*/2));
+
+  AddInputFromArray<complex64>(TensorShape({3}), {{-4, 2}, {0}, {3.14159, -1}});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(
+      &expected, {"(-4.00, 2.00)", "( 0.00, 0.00)", "( 3.14,-1.00)"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Bool) {
+  TF_ASSERT_OK(Init(DT_BOOL));
+
+  AddInputFromArray<bool>(TensorShape({2}), {true, false});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({2}));
+  test::FillValues<tstring>(&expected, {"true", "false"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, String) {
+  Status s = Init(DT_STRING);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      "Value for attr 'T' of string is not in the list of allowed values"));
+}
+
+TEST_F(AsStringGraphTest, OnlyOneOfScientificAndShortest) {
+  Status s = Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/true, /*shortest=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(),
+                        "Cannot select both scientific and shortest notation"));
+}
+
+TEST_F(AsStringGraphTest, NoShortestForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/false, /*shortest=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      "scientific and shortest format not supported for datatype"));
+}
+
+TEST_F(AsStringGraphTest, NoScientificForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      "scientific and shortest format not supported for datatype"));
+}
+
+TEST_F(AsStringGraphTest, NoPrecisionForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/5);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(s.error_message(),
+                                "precision not supported for datatype"));
+}
+
+TEST_F(AsStringGraphTest, LongFill) {
+  Status s = Init(DT_INT32, /*fill=*/"asdf");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(s.error_message(),
+                                "Fill string must be one or fewer characters"));
+}
+
+TEST_F(AsStringGraphTest, FillWithZero) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"0", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {"-042", "0000", "0042"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithSpace) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/" ", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {" -42", "   0", "  42"});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar1) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"-", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {"-42 ", "0   ", "42  "});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar3) {
+  Status s = Init(DT_INT32, /*fill=*/"s");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(), "Fill argument not supported"));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar4) {
+  Status s = Init(DT_INT32, /*fill=*/"n");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(), "Fill argument not supported"));
+}
+
+}  // end namespace
+}  // end namespace tensorflow
diff --git a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
index 19be606f184939..e3a908d1b6b20d 100644
--- a/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
+++ b/tensorflow/core/kernels/boosted_trees/prediction_ops.cc
@@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
       auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
                       &cached_node_ids, &output_partial_logits,
                       &output_node_ids, latest_tree,
-                      this](int32 start, int32 end) {
+                      this](int64 start, int64 end) {
         for (int32 i = start; i < end; ++i) {
           int32 tree_id = cached_tree_ids(i);
           int32 node_id = cached_node_ids(i);
@@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {
 
     const int32 last_tree = resource->num_trees() - 1;
     auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
-                    this](int32 start, int32 end) {
+                    this](int64 start, int64 end) {
       for (int32 i = start; i < end; ++i) {
         std::vector<float> tree_logits(logits_dimension_, 0.0);
         int32 tree_id = 0;
@@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
     // path. Note: feature_ids has one less value than logits_path because the
     // first value of each logit path will be the bias.
     auto do_work = [&resource, &bucketized_features, &output_debug_info,
-                    last_tree](int32 start, int32 end) {
+                    last_tree](int64 start, int64 end) {
       for (int32 i = start; i < end; ++i) {
         // Proto to store debug outputs, per example.
         boosted_trees::DebugOutput example_debug_info;
diff --git a/tensorflow/core/kernels/conv_2d_gpu.h b/tensorflow/core/kernels/conv_2d_gpu.h
index 22d7f939686a1b..31abe9dfead8ec 100644
--- a/tensorflow/core/kernels/conv_2d_gpu.h
+++ b/tensorflow/core/kernels/conv_2d_gpu.h
@@ -236,7 +236,7 @@ __global__ void SwapDimension1And2InTensor3UsingTiles(
   // One extra line in the inner dimension to avoid share memory bank conflict.
   // This is to mimic the following, but no constructor of T can be invoked.
   //     __shared__ T shared_memory_tile[TileSizeI][TileSizeJ + 1];
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_COMPILER_IS_HIP_CLANG
   __shared__ __align__(
       alignof(T)) char shared_mem_raw[TileSizeI * (TileSizeJ + 1) * sizeof(T)];
   typedef T(*SharedMemoryTile)[TileSizeJ + 1];
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index 594dbd0d0df74a..e2681cf8ac9154 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -496,6 +496,14 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
     const int filter_total_size = dims.spatial_dims[0].filter_size *
                                   dims.spatial_dims[1].filter_size *
                                   dims.in_depth;
+    OP_REQUIRES(
+        context,
+        filter_total_size * dims.out_depth == filter_backprop->NumElements(),
+        errors::InvalidArgument(
+            "filter_size does not have enough elements, requested ",
+            filter_total_size * dims.out_depth, ", got ",
+            filter_backprop->NumElements()));
+
     // The output image size is the spatial size of the output.
     const int output_image_size =
         dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size;
@@ -519,6 +527,11 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
 
     const size_t work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size != 0,
+        errors::InvalidArgument(
+            "Work size for convolution would be 0, which is not acceptable"));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 2f6200e5045b68..f4d1fa49e65436 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -673,6 +673,11 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
         dims.batch_size == 1 ||
         thread_work_unit_size >= min_thread_work_unit_size;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+
     const size_t shard_size =
         use_parallel_contraction
             ? 1
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 96bc41a7262e59..cc6e3ffd90075c 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -236,6 +236,28 @@ class Conv3DBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 "Conv3DBackpropInputOp", /*num_spatial_dims=*/3,
@@ -342,6 +364,28 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 "Conv3DBackpropInputOp", /*num_spatial_dims=*/3,
@@ -412,6 +456,11 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
     // contraction compared to sharding and matmuls.
     const bool use_parallel_contraction = dims.batch_size == 1;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+
     const size_t shard_size =
         use_parallel_contraction
             ? 1
@@ -692,6 +741,28 @@ class Conv3DBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
@@ -804,6 +875,28 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument("input tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument("filter_sizes tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument("out_backprop tensor must have 5 dimensions"));
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument("input and filter_sizes must have the same "
+                                "number of channels. Got ",
+                                input_shape.dim_size(4), " for input and ",
+                                filter_shape.dim_size(3), " for filter_sizes"));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument("out_backprop and filter_sizes must have the "
+                                "same number of channels. Got ",
+                                out_backprop_shape.dim_size(4),
+                                " for out_backprop and ",
+                                filter_shape.dim_size(4), " for filter_sizes"));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
@@ -876,6 +969,11 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
 
     const int64 work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument("input, filter_sizes and out_backprop tensors "
+                                "must all have at least 1 element"));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
diff --git a/tensorflow/core/kernels/conv_grad_shape_utils.cc b/tensorflow/core/kernels/conv_grad_shape_utils.cc
index 7857257658fcff..ce90e866342311 100644
--- a/tensorflow/core/kernels/conv_grad_shape_utils.cc
+++ b/tensorflow/core/kernels/conv_grad_shape_utils.cc
@@ -126,6 +126,10 @@ Status ConvBackpropComputeDimensionsV2(
   // dimensions of the filter Tensor.
   VLOG(2) << "input vs filter_in depth " << dims->in_depth << " "
           << filter_shape.dim_size(num_dims - 2);
+  if (filter_shape.dim_size(num_dims - 2) <= 0) {
+    return errors ::InvalidArgument(
+        label, ": filter depth must be strictly greated than zero");
+  }
   if (dims->in_depth % filter_shape.dim_size(num_dims - 2)) {
     return errors::InvalidArgument(
         label, ": input depth must be evenly divisible by filter depth");
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index d5ce7de1d2572a..cb710975cc48be 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -425,6 +425,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params,
               errors::InvalidArgument("Patch depth too large"));
   const int in_depth = static_cast<int>(in_depth_raw);
   const int patch_depth = static_cast<int>(patch_depth_raw);
+  TF_REQUIRES(patch_depth > 0,
+              errors::InvalidArgument(
+                  "filter depth must be stricly positive, got ", patch_depth));
   TF_REQUIRES(in_depth % patch_depth == 0,
               errors::InvalidArgument(
                   "input depth must be evenly divisible by filter depth: ",
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index f0b9bf12e8e7ff..4cb96e21879f81 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -67,6 +67,11 @@ struct LaunchConvOp<CPUDevice, T> {
                 errors::InvalidArgument("CPU implementation of Conv3D "
                                         "currently only supports dilated rates "
                                         "of 1."));
+    OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1),
+                errors::InvalidArgument(
+                    "Number of channels in filter (", filter.dim_size(3),
+                    ") must match last dimension of input (",
+                    input.dim_size(input.dims() - 1), ")"));
     functor::CuboidConvolution<CPUDevice, T>()(
         context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
         input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
@@ -140,6 +145,8 @@ class Conv3DOp : public BinaryOp<T> {
     const int64 filter_depth = filter.dim_size(3);
     const int64 out_depth = filter.dim_size(4);
 
+    OP_REQUIRES(context, filter_depth != 0,
+                errors::InvalidArgument("filter_depth must be non-zero"));
     OP_REQUIRES(context, in_depth % filter_depth == 0,
                 errors::InvalidArgument(
                     "Input depth must be evenly divisible by filter depth: ",
diff --git a/tensorflow/core/kernels/ctc_decoder_ops.cc b/tensorflow/core/kernels/ctc_decoder_ops.cc
index 517612eecb6057..60b10107537408 100644
--- a/tensorflow/core/kernels/ctc_decoder_ops.cc
+++ b/tensorflow/core/kernels/ctc_decoder_ops.cc
@@ -70,6 +70,9 @@ class CTCDecodeHelper {
     if (inputs_shape.dims() != 3) {
       return errors::InvalidArgument("inputs is not a 3-Tensor");
     }
+    if (inputs_shape.num_elements() == 0) {
+      return errors::InvalidArgument("inputs must not be empty");
+    }
 
     const int64 max_time = inputs_shape.dim_size(0);
     const int64 batch_size = inputs_shape.dim_size(1);
@@ -224,6 +227,8 @@ class CTCGreedyDecoderOp : public OpKernel {
         int prev_indices = -1;
         for (int t = 0; t < seq_len_t(b); ++t) {
           int max_class_indices;
+          OP_REQUIRES(ctx, input_list_t[t].dimension(1) > 0,
+                      errors::InvalidArgument("Invalid input dimensions."));
           log_prob_t(b, 0) +=
               -RowMax<T>(input_list_t[t], b, &max_class_indices);
           if (max_class_indices != blank_index &&
diff --git a/tensorflow/core/kernels/ctc_loss_op.cc b/tensorflow/core/kernels/ctc_loss_op.cc
index 995d28a158cfa0..b24a53a188fdbf 100644
--- a/tensorflow/core/kernels/ctc_loss_op.cc
+++ b/tensorflow/core/kernels/ctc_loss_op.cc
@@ -62,11 +62,18 @@ class CTCLossOp : public OpKernel {
                 errors::InvalidArgument("sequence_length is not a vector"));
     OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(labels_indices->shape()),
                 errors::InvalidArgument("labels_indices is not a matrix"));
+    OP_REQUIRES(ctx, labels_indices->dim_size(1) > 1,
+                errors::InvalidArgument(
+                    "labels_indices second dimension must be >= 1. Received ",
+                    labels_indices->dim_size(1)));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(labels_values->shape()),
                 errors::InvalidArgument("labels_values is not a vector"));
 
     const TensorShape& inputs_shape = inputs->shape();
     const int64 max_time = inputs_shape.dim_size(0);
+    OP_REQUIRES(ctx, max_time != 0,
+                errors::InvalidArgument(
+                    "Max time or first dimension of input cannot be 0."));
     const int64 batch_size = inputs_shape.dim_size(1);
     const int64 num_classes_raw = inputs_shape.dim_size(2);
     OP_REQUIRES(
diff --git a/tensorflow/core/kernels/data_format_ops.cc b/tensorflow/core/kernels/data_format_ops.cc
index 0b4241dbb9312c..a51bfc3b17294a 100644
--- a/tensorflow/core/kernels/data_format_ops.cc
+++ b/tensorflow/core/kernels/data_format_ops.cc
@@ -18,16 +18,52 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include "tensorflow/core/kernels/data_format_ops.h"
+
+#include <map>
+
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
+// Ensure that `src` and `dst` define a valid permutation.
+// Ops defined in this file assume that user specifies a permutation via two
+// string attributes. This check validates that these attributes properly define
+// it to prevent security vulnerabilities.
+static bool IsValidPermutation(const std::string& src, const std::string& dst) {
+  if (src.size() != dst.size()) {
+    return false;
+  }
+
+  std::map<char, bool> characters;
+
+  // Every character in `src` must be present only once
+  for (const auto c : src) {
+    if (characters[c]) {
+      return false;
+    }
+    characters[c] = true;
+  }
+
+  // Every character in `dst` must show up in `src` exactly once
+  for (const auto c : dst) {
+    if (!characters[c]) {
+      return false;
+    }
+    characters[c] = false;
+  }
+
+  // At this point, characters[] has been switched to true and false exactly
+  // once for all character in `src` (and `dst`) so we have a valid permutation
+  return true;
+}
+
 template <typename Device, typename T>
 class DataFormatDimMapOp : public OpKernel {
  public:
@@ -37,15 +73,20 @@ class DataFormatDimMapOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
     string dst_format;
     OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
-    OP_REQUIRES(context, src_format.size() == 4,
-                errors::InvalidArgument(strings::StrCat(
-                    "Source format must of length 4, received src_format = ",
-                    src_format)));
+    OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5,
+                errors::InvalidArgument(
+                    "Source format must be of length 4 or 5, received "
+                    "src_format = ",
+                    src_format));
+    OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5,
+                errors::InvalidArgument("Destination format must be of length "
+                                        "4 or 5, received dst_format = ",
+                                        dst_format));
     OP_REQUIRES(
-        context, dst_format.size() == 4,
-        errors::InvalidArgument(strings::StrCat(
-            "Destination format must of length 4, received dst_format = ",
-            dst_format)));
+        context, IsValidPermutation(src_format, dst_format),
+        errors::InvalidArgument(
+            "Destination and source format must determine a permutation, got ",
+            src_format, " and ", dst_format));
     dst_idx_ = Tensor(DT_INT32, {static_cast<int64>(src_format.size())});
     for (int i = 0; i < src_format.size(); ++i) {
       for (int j = 0; j < dst_format.size(); ++j) {
@@ -77,8 +118,22 @@ class DataFormatVecPermuteOp : public OpKernel {
       : OpKernel(context) {
     string src_format;
     OP_REQUIRES_OK(context, context->GetAttr("src_format", &src_format));
+    OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5,
+                errors::InvalidArgument(
+                    "Source format must be of length 4 or 5, received "
+                    "src_format = ",
+                    src_format));
     string dst_format;
     OP_REQUIRES_OK(context, context->GetAttr("dst_format", &dst_format));
+    OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5,
+                errors::InvalidArgument("Destination format must be of length "
+                                        "4 or 5, received dst_format = ",
+                                        dst_format));
+    OP_REQUIRES(
+        context, IsValidPermutation(src_format, dst_format),
+        errors::InvalidArgument(
+            "Destination and source format must determine a permutation, got ",
+            src_format, " and ", dst_format));
     src_format_ = src_format;
     dst_format_ = dst_format;
   }
@@ -112,6 +167,24 @@ class DataFormatVecPermuteOp : public OpKernel {
                    context->allocate_output(0, input.shape(), &output));
     // Support 1D and 2D cases.
     Eigen::DSizes<Eigen::DenseIndex, 8> dst_idx;
+    string src_format_str = src_format_;
+    string dst_format_str = dst_format_;
+    if (input.dim_size(0) == 2) {
+      // If the input is a vector of size 2, treat the two elements as spatial
+      // dimensions.
+      auto keep_only_spatial_dimensions = [](string* format_str) -> void {
+        auto new_end = std::remove_if(
+            format_str->begin(), format_str->end(),
+            [](const char dim) { return dim != 'H' && dim != 'W'; });
+        format_str->erase(new_end, format_str->end());
+      };
+      keep_only_spatial_dimensions(&src_format_str);
+      keep_only_spatial_dimensions(&dst_format_str);
+      OP_REQUIRES(context,
+                  src_format_str.size() == 2 && dst_format_str.size() == 2,
+                  errors::InvalidArgument(
+                      "Format specifier must contain H and W for 2D case"));
+    }
     ComputeDstIndex(input.dims(), &dst_idx);
 
     functor::DataFormatVecPermute<Device, T>()(context->eigen_device<Device>(),
diff --git a/tensorflow/core/kernels/decode_padded_raw_op.cc b/tensorflow/core/kernels/decode_padded_raw_op.cc
index 12e8ec6aff0d41..ca7c7104b442d2 100644
--- a/tensorflow/core/kernels/decode_padded_raw_op.cc
+++ b/tensorflow/core/kernels/decode_padded_raw_op.cc
@@ -83,14 +83,13 @@ class DecodePaddedRawOp : public OpKernel {
     // can copy the memory directly.
     if (!convert_data_endianness_ || sizeof(T) == 1) {
       for (int64 i = 0; i < flat_in.size(); ++i) {
-        const T* in_data = reinterpret_cast<const T*>(flat_in(i).data());
-
-        if (flat_in(i).size() > fixed_length) {
-          memcpy(out_data, in_data, fixed_length);
-        } else {
-          memcpy(out_data, in_data, flat_in(i).size());
-        }
-        out_data += fixed_length;
+        const auto to_copy =
+            std::min(flat_in(i).size(), static_cast<size_t>(fixed_length));
+        memcpy(out_data, flat_in(i).data(), to_copy);
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     } else {
       // Otherwise, the data is not in the host's byte order, and rather than a
@@ -105,7 +104,10 @@ class DecodePaddedRawOp : public OpKernel {
              p_in += sizeof(T), p_out += sizeof(T)) {
           std::reverse_copy(p_in, p_in + sizeof(T), p_out);
         }
-        out_data += fixed_length;
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     }
   }
diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc
index 481909e8420e76..b9d786ac7c30e1 100644
--- a/tensorflow/core/kernels/dequantize_op.cc
+++ b/tensorflow/core/kernels/dequantize_op.cc
@@ -69,6 +69,18 @@ class DequantizeOp : public OpKernel {
     if (axis_ > -1) {
       num_slices = input.dim_size(axis_);
     }
+    OP_REQUIRES(ctx, input_min_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    "input_min_tensor must have as many elements as input on "
+                    "the dequantization axis (",
+                    axis_, "), got ", input_min_tensor.NumElements(),
+                    ", expected ", num_slices));
+    OP_REQUIRES(ctx, input_max_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    "input_max_tensor must have as many elements as input on "
+                    "the dequantization axis (",
+                    axis_, "), got ", input_max_tensor.NumElements(),
+                    ", expected ", num_slices));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/dilation_ops.cc b/tensorflow/core/kernels/dilation_ops.cc
index f2e7b8a857a0f4..fdb2716eff9284 100644
--- a/tensorflow/core/kernels/dilation_ops.cc
+++ b/tensorflow/core/kernels/dilation_ops.cc
@@ -130,6 +130,7 @@ class DilationOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Output tensor is of the following dimensions:
     // [ batch, out_rows, out_cols, depth ]
@@ -229,6 +230,7 @@ class DilationBackpropInputOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -318,8 +320,10 @@ struct DilationBackpropInput<CPUDevice, T> {
                 }
               }
             }
-            in_backprop(b, h_in_max, w_in_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_in_max < input_rows && w_in_max < input_cols) {
+              in_backprop(b, h_in_max, w_in_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
@@ -349,6 +353,7 @@ class DilationBackpropFilterOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -438,8 +443,10 @@ struct DilationBackpropFilter<CPUDevice, T> {
                 }
               }
             }
-            filter_backprop(h_max, w_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_max < filter_rows && w_max < filter_cols) {
+              filter_backprop(h_max, w_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
diff --git a/tensorflow/core/kernels/draw_bounding_box_op.cc b/tensorflow/core/kernels/draw_bounding_box_op.cc
index 30de99b7d560a2..926ea368a58ba8 100644
--- a/tensorflow/core/kernels/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/draw_bounding_box_op.cc
@@ -73,6 +73,12 @@ class DrawBoundingBoxesOp : public OpKernel {
         errors::InvalidArgument("Channel depth should be either 1 (GRY), "
                                 "3 (RGB), or 4 (RGBA)"));
 
+    OP_REQUIRES(
+        context, boxes.dim_size(2) == 4,
+        errors::InvalidArgument(
+            "The size of the third dimension of the box must be 4. Received: ",
+            boxes.dim_size(2)));
+
     const int64 batch_size = images.dim_size(0);
     const int64 height = images.dim_size(1);
     const int64 width = images.dim_size(2);
@@ -147,22 +153,46 @@ class DrawBoundingBoxesOp : public OpKernel {
 
         // At this point, {min,max}_box_{row,col}_clamp are inside the
         // image.
-        CHECK_GE(min_box_row_clamp, 0);
-        CHECK_GE(max_box_row_clamp, 0);
-        CHECK_LT(min_box_row_clamp, height);
-        CHECK_LT(max_box_row_clamp, height);
-        CHECK_GE(min_box_col_clamp, 0);
-        CHECK_GE(max_box_col_clamp, 0);
-        CHECK_LT(min_box_col_clamp, width);
-        CHECK_LT(max_box_col_clamp, width);
+        OP_REQUIRES(
+            context, min_box_row_clamp >= 0,
+            errors::InvalidArgument("Min box row clamp is less than 0."));
+        OP_REQUIRES(
+            context, max_box_row_clamp >= 0,
+            errors::InvalidArgument("Max box row clamp is less than 0."));
+        OP_REQUIRES(context, min_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        "Min box row clamp is greater than height."));
+        OP_REQUIRES(context, max_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        "Max box row clamp is greater than height."));
+
+        OP_REQUIRES(
+            context, min_box_col_clamp >= 0,
+            errors::InvalidArgument("Min box col clamp is less than 0."));
+        OP_REQUIRES(
+            context, max_box_col_clamp >= 0,
+            errors::InvalidArgument("Max box col clamp is less than 0."));
+        OP_REQUIRES(context, min_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        "Min box col clamp is greater than width."));
+        OP_REQUIRES(context, max_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        "Max box col clamp is greater than width."));
 
         // At this point, the min_box_row and min_box_col are either
         // in the image or above/left of it, and max_box_row and
         // max_box_col are either in the image or below/right or it.
-        CHECK_LT(min_box_row, height);
-        CHECK_GE(max_box_row, 0);
-        CHECK_LT(min_box_col, width);
-        CHECK_GE(max_box_col, 0);
+
+        OP_REQUIRES(
+            context, min_box_row <= height,
+            errors::InvalidArgument("Min box row is greater than height."));
+        OP_REQUIRES(context, max_box_row >= 0,
+                    errors::InvalidArgument("Max box row is less than 0."));
+        OP_REQUIRES(
+            context, min_box_col <= width,
+            errors::InvalidArgument("Min box col is greater than width."));
+        OP_REQUIRES(context, max_box_col >= 0,
+                    errors::InvalidArgument("Max box col is less than 0."));
 
         // Draw top line.
         if (min_box_row >= 0) {
diff --git a/tensorflow/core/kernels/edit_distance_op.cc b/tensorflow/core/kernels/edit_distance_op.cc
index 4aecdc9e414d36..386a1af08409f6 100644
--- a/tensorflow/core/kernels/edit_distance_op.cc
+++ b/tensorflow/core/kernels/edit_distance_op.cc
@@ -64,6 +64,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
     return errors::InvalidArgument(
         "truth_shape should be a vector, but got shape: ",
         truth_shape.shape().DebugString());
+  if (hypothesis_values.NumElements() != hypothesis_indices.dim_size(0))
+    return errors::InvalidArgument(
+        "Expected hypothesis_values.NumElements == "
+        "#rows(hypothesis_indices), their shapes are: ",
+        hypothesis_values.shape().DebugString(), " and ",
+        hypothesis_indices.shape().DebugString());
   if (hypothesis_shape.NumElements() != hypothesis_indices.dim_size(1))
     return errors::InvalidArgument(
         "Expected hypothesis_shape.NumElements == "
@@ -75,6 +81,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
         "Input SparseTensors must have rank at least 2, but truth_shape "
         "rank is: ",
         truth_shape.NumElements());
+  if (truth_values.NumElements() != truth_indices.dim_size(0))
+    return errors::InvalidArgument(
+        "Expected truth_values.NumElements == "
+        "#rows(truth_indices), their shapes are: ",
+        truth_values.shape().DebugString(), " and ",
+        truth_indices.shape().DebugString());
   if (truth_shape.NumElements() != truth_indices.dim_size(1))
     return errors::InvalidArgument(
         "Expected truth_shape.NumElements == "
@@ -153,6 +165,11 @@ class EditDistanceOp : public OpKernel {
       output_shape.AddDim(std::max(hypothesis_st_shape.dim_size(d),
                                    truth_st_shape.dim_size(d)));
     }
+    const auto output_elements = output_shape.num_elements();
+    OP_REQUIRES(
+        ctx, output_elements > 0,
+        errors::InvalidArgument("Got output shape ", output_shape.DebugString(),
+                                " which has 0 elements"));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output("output", output_shape, &output));
@@ -185,6 +202,12 @@ class EditDistanceOp : public OpKernel {
       if (g_truth == g_hypothesis) {
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) =
             gtl::LevenshteinDistance<T>(truth_seq, hypothesis_seq, cmp);
         if (normalize_) output_t(loc) /= truth_seq.size();
@@ -194,6 +217,12 @@ class EditDistanceOp : public OpKernel {
       } else if (g_truth > g_hypothesis) {  // zero-length truth
         auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) = hypothesis_seq.size();
         if (normalize_ && output_t(loc) != 0.0f) {
           output_t(loc) = std::numeric_limits<float>::infinity();
@@ -202,6 +231,12 @@ class EditDistanceOp : public OpKernel {
       } else {  // zero-length hypothesis
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal("Got an inner product ", loc,
+                             " which would require in writing to outside of "
+                             "the buffer for the output tensor (max elements ",
+                             output_elements, ")"));
         output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
         ++truth_iter;
       }
@@ -212,6 +247,12 @@ class EditDistanceOp : public OpKernel {
       auto hypothesis_seq = hypothesis_j.values<T>();
       auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal("Got an inner product ", loc,
+                           " which would require in writing to outside of the "
+                           "buffer for the output tensor (max elements ",
+                           output_elements, ")"));
       output_t(loc) = hypothesis_seq.size();
       if (normalize_ && output_t(loc) != 0.0f) {
         output_t(loc) = std::numeric_limits<float>::infinity();
@@ -224,6 +265,12 @@ class EditDistanceOp : public OpKernel {
       auto truth_seq = truth_i.values<T>();
       auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal("Got an inner product ", loc,
+                           " which would require in writing to outside of the "
+                           "buffer for the output tensor (max elements ",
+                           output_elements, ")"));
       output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
       ++truth_iter;
     }
diff --git a/tensorflow/core/kernels/encode_png_op.cc b/tensorflow/core/kernels/encode_png_op.cc
index 8dbe1d377df5c6..09bcdbe5e3db0b 100644
--- a/tensorflow/core/kernels/encode_png_op.cc
+++ b/tensorflow/core/kernels/encode_png_op.cc
@@ -54,6 +54,8 @@ class EncodePngOp : public OpKernel {
     OP_REQUIRES(context, image.dims() == 3,
                 errors::InvalidArgument("image must be 3-dimensional",
                                         image.shape().DebugString()));
+    OP_REQUIRES(context, image.NumElements() > 0,
+                errors::Internal("Invalid image provided."));
     OP_REQUIRES(
         context,
         FastBoundsCheck(image.NumElements(), std::numeric_limits<int32>::max()),
diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc
index 058435948394c5..d2f7124eba1de9 100644
--- a/tensorflow/core/kernels/fft_ops.cc
+++ b/tensorflow/core/kernels/fft_ops.cc
@@ -221,6 +221,9 @@ class FFTCPU : public FFTBase {
       input_slice_sizes[i] = fft_shape[i - 1];
       temp_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, temp_shape.num_elements() > 0,
+                errors::InvalidArgument("Obtained a FFT shape of 0 elements: ",
+                                        temp_shape.DebugString()));
 
     auto output = out->flat_inner_dims<ComplexT, FFTRank + 1>();
     const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
@@ -261,6 +264,9 @@ class FFTCPU : public FFTBase {
           i == FFTRank ? fft_shape[i - 1] / 2 + 1 : fft_shape[i - 1];
       full_fft_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, full_fft_shape.num_elements() > 0,
+                errors::InvalidArgument("Obtained a FFT shape of 0 elements: ",
+                                        full_fft_shape.DebugString()));
 
     Tensor temp;
     OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<ComplexT>::v(),
diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
index dfc2382624e3fa..0452638a066795 100644
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@@ -80,6 +80,10 @@ class FractionalAvgPoolOp : public OpKernel {
     std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
+      OP_REQUIRES(
+          context, pooling_ratio_[i] <= input_size[i],
+          errors::InvalidArgument(
+              "Pooling ratio cannot be bigger than input tensor dim size."));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -246,6 +250,19 @@ class FractionalAvgPoolGradOp : public OpKernel {
     const int64 out_cols = out_backprop.dim_size(2);
     const int64 out_depth = out_backprop.dim_size(3);
 
+    OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows,
+                errors::InvalidArgument("Given out_backprop shape ",
+                                        out_backprop.shape().DebugString(),
+                                        ", row_seq_tensor must have at least ",
+                                        out_rows + 1, " elements, but got ",
+                                        row_seq_tensor.NumElements()));
+    OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols,
+                errors::InvalidArgument("Given out_backprop shape ",
+                                        out_backprop.shape().DebugString(),
+                                        ", col_seq_tensor must have at least ",
+                                        out_cols + 1, " elements, but got ",
+                                        col_seq_tensor.NumElements()));
+
     auto row_seq_tensor_flat = row_seq_tensor.flat<int64>();
     auto col_seq_tensor_flat = col_seq_tensor.flat<int64>();
     auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>();
diff --git a/tensorflow/core/kernels/fractional_max_pool_op.cc b/tensorflow/core/kernels/fractional_max_pool_op.cc
index 619a3507ce415f..1a2a783d135c54 100644
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@@ -235,6 +235,20 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     // Just to make it similar to FractionalMaxPoolOp.
     constexpr int tensor_in_and_out_dims = 4;
+    OP_REQUIRES(
+        context, tensor_in.dims() == tensor_in_and_out_dims,
+        errors::InvalidArgument("orig_input should be a tensor of rank 4, got ",
+                                tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument("orig_input must not be empty, got ",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.dims() == tensor_in_and_out_dims,
+                errors::InvalidArgument(
+                    "orig_output should be a tensor of rank 4, got ",
+                    tensor_out.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument("orig_output must not be empty, got ",
+                                        tensor_out.DebugString()));
     std::vector<int64> input_size(tensor_in_and_out_dims);
     std::vector<int64> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
diff --git a/tensorflow/core/kernels/fused_batch_norm_op.cc b/tensorflow/core/kernels/fused_batch_norm_op.cc
index 83602cdee0847d..1af8e1784f0bac 100644
--- a/tensorflow/core/kernels/fused_batch_norm_op.cc
+++ b/tensorflow/core/kernels/fused_batch_norm_op.cc
@@ -301,6 +301,9 @@ struct FusedBatchNorm<CPUDevice, T, U> {
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
     const int depth = x.dimension(3);
+    OP_REQUIRES(
+        context, depth != 0,
+        errors::Internal("The 4th element in the input shape cannot be 0."));
     const int size = x.size();
     const int rest_size = size / depth;
     Eigen::DSizes<Eigen::Index, 2> rest_by_depth(rest_size, depth);
@@ -1153,6 +1156,33 @@ class FusedBatchNormOpBase : public OpKernel {
         context, estimated_variance.dims() == 1,
         errors::InvalidArgument("estimated_variance must be 1-dimensional",
                                 estimated_variance.shape().DebugString()));
+  
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+        context, scale.NumElements() == num_channels,
+        errors::InvalidArgument("scale must have the same number of elements "
+                                "as the channels of x, got ",
+                                scale.NumElements(), " and ", num_channels));
+    OP_REQUIRES(
+        context, offset.NumElements() == num_channels,
+        errors::InvalidArgument("offset must have the same number of elements "
+                                "as the channels of x, got ",
+                                offset.NumElements(), " and ", num_channels));
+    if (estimated_mean.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_mean.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      "mean must be empty or have the same number of "
+                      "elements as the channels of x, got ",
+                      estimated_mean.NumElements(), " and ", num_channels));
+    }
+    if (estimated_variance.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_variance.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      "variance must be empty or have the same number of "
+                      "elements as the channels of x, got ",
+                      estimated_variance.NumElements(), " and ", num_channels));
+    }
+
     if (has_side_input_) {
       OP_REQUIRES(context, side_input.shape() == x.shape(),
                   errors::InvalidArgument(
@@ -1165,7 +1195,7 @@ class FusedBatchNormOpBase : public OpKernel {
       // NOTE(ezhulenev): This requirement is coming from implementation
       // details of cudnnBatchNormalizationForwardTrainingEx.
       OP_REQUIRES(
-          context, !is_training_ || x.dim_size(3) % 4 == 0,
+          context, !is_training_ || num_channels % 4 == 0,
           errors::InvalidArgument("FusedBatchNorm with activation requires "
                                   "channel dimension to be a multiple of 4."));
     }
diff --git a/tensorflow/core/kernels/immutable_constant_op.cc b/tensorflow/core/kernels/immutable_constant_op.cc
index 0dd08c694eb6c5..19aa865c1fbe4d 100644
--- a/tensorflow/core/kernels/immutable_constant_op.cc
+++ b/tensorflow/core/kernels/immutable_constant_op.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "tensorflow/core/framework/types.pb.h"
+
 namespace tensorflow {
 
 namespace {
@@ -62,6 +64,12 @@ class MemmappedTensorAllocator : public Allocator {
 
   void set_delete_on_deallocate() { delete_on_deallocate_ = true; }
 
+  // Make sure tensors or complex types (strings, variants, resources) don't get
+  // their constructor called via a placement new since that would require
+  // writing to immutable data.
+  // See also: tensorflow/core/framework/typed_allocator.h
+  bool AllocatesOpaqueHandle() const override { return true; }
+
  private:
   std::unique_ptr<ReadOnlyMemoryRegion> memory_region_;
   // If there is an error during allocation we keep it in this status.
@@ -80,6 +88,9 @@ ImmutableConstantOp::ImmutableConstantOp(OpKernelConstruction* context)
   OP_REQUIRES_OK(context,
                  context->GetAttr(kMemoryRegionNameAttr, &region_name_));
   OP_REQUIRES_OK(context, context->GetAttr(kDTypeAttr, &dtype_));
+  OP_REQUIRES(context, dtype_ != DT_RESOURCE && dtype_ != DT_VARIANT,
+              errors::InvalidArgument(
+                  "Resource and variant dtypes are invalid for this op."));
   OP_REQUIRES_OK(context, context->GetAttr(kShapeAttr, &shape_));
 }
 
diff --git a/tensorflow/core/kernels/load_and_remap_matrix_op.cc b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
index cb0245a9b61261..5ec28c70358132 100644
--- a/tensorflow/core/kernels/load_and_remap_matrix_op.cc
+++ b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
@@ -123,6 +123,11 @@ class LoadAndRemapMatrixOp : public OpKernel {
     // Processes the checkpoint source and the provided Tensor name.
     const Tensor* ckpt_path_t;
     OP_REQUIRES_OK(context, context->input("ckpt_path", &ckpt_path_t));
+    OP_REQUIRES(
+        context, ckpt_path_t->NumElements() == 1,
+        errors::InvalidArgument("The `ckpt_path` tensor must have exactly one "
+                                "element, got tensor of shape ",
+                                ckpt_path_t->shape().DebugString()));
     const string& ckpt_path = ckpt_path_t->scalar<tstring>()();
     const Tensor* old_tensor_name_t;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/matrix_diag_op.cc b/tensorflow/core/kernels/matrix_diag_op.cc
index ae69e7752f102c..9eef404bf47dba 100644
--- a/tensorflow/core/kernels/matrix_diag_op.cc
+++ b/tensorflow/core/kernels/matrix_diag_op.cc
@@ -178,9 +178,22 @@ class MatrixDiagOp : public OpKernel {
           upper_diag_index = diag_index.flat<int32>()(1);
         }
       }
-      num_rows = context->input(2).flat<int32>()(0);
-      num_cols = context->input(3).flat<int32>()(0);
-      padding_value = context->input(4).flat<T>()(0);
+
+      auto& num_rows_tensor = context->input(2);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_rows_tensor.shape()),
+                  errors::InvalidArgument("num_rows must be a scalar"));
+      num_rows = num_rows_tensor.flat<int32>()(0);
+
+      auto& num_cols_tensor = context->input(3);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_cols_tensor.shape()),
+                  errors::InvalidArgument("num_cols must be a scalar"));
+      num_cols = num_cols_tensor.flat<int32>()(0);
+
+      auto& padding_value_tensor = context->input(4);
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(padding_value_tensor.shape()),
+                  errors::InvalidArgument("padding_value must be a scalar"));
+      padding_value = padding_value_tensor.flat<T>()(0);
     }
 
     // Size validations.
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 5652addd00a957..7accd1a820b0e3 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -192,7 +192,9 @@ static void SpatialMaxPoolWithArgMaxHelper(
         // CHECK(input_backprop_index >= in_start && input_backprop_index <
         // in_end)
         FastBoundsCheck(input_backprop_index - in_start, in_end - in_start);
-        input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        if (index < out_backprop.NumElements()) {
+          input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        }
       }
     }
   };
@@ -984,6 +986,9 @@ struct LaunchMaxPoolingGradWithArgmax<CPUDevice, T> {
         const int input_start = start * input_size_per_batch;
         const int input_end = limit * input_size_per_batch;
         for (int64 index = input_start; index < input_end; index++) {
+          if (index >= argmax.NumElements()) {
+            break;
+          }
           int64 grad_out_index = argmax_flat(index);
           if (!include_batch_in_index) {
             const int64 cur_batch = index / input_size_per_batch;
@@ -1049,6 +1054,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {0}, 0, out_shape, &grad_out));
 
+    if (out_shape.num_elements() == 0) return;  // nothing to be done
+
     LaunchMaxPoolingGradWithArgmax<Device, T>::launch(
         context, params, grad_in, argmax, grad_out, include_batch_in_index_);
   }
diff --git a/tensorflow/core/kernels/nth_element_op.cc b/tensorflow/core/kernels/nth_element_op.cc
index 0e43cc19aae513..bd523f51e27e2d 100644
--- a/tensorflow/core/kernels/nth_element_op.cc
+++ b/tensorflow/core/kernels/nth_element_op.cc
@@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
     const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
 
     // Allocate each row to different shard.
-    auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) {
+    auto SubNthElement = [&, input, output, last_dim, n](int64 start,
+                                                         int64 limit) {
       // std::nth_element would rearrange the array, so we need a new buffer.
       std::vector<T> buf(last_dim);
 
diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
index 09dc3ffd12910f..62d996ed00ab30 100644
--- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
+++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
@@ -69,8 +69,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {
 
     auto DoWork = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
                    &minvals, &maxvals, &gen, &output,
-                   kStdDevsInsideBoundsToUseRandnSampler](int start_batch,
-                                                          int limit_batch) {
+                   kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
+                                                          int64 limit_batch) {
       // Capturing "gen" by-value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // "gen" by reference and explicitly do a copy assignment here.
@@ -336,6 +336,9 @@ class ParameterizedTruncatedNormalOp : public OpKernel {
         ctx, TensorShapeUtils::IsVector(shape_tensor.shape()),
         errors::InvalidArgument("Input shape should be a vector, got shape: ",
                                 shape_tensor.shape().DebugString()));
+    OP_REQUIRES(ctx, shape_tensor.NumElements() > 0,
+                errors::InvalidArgument("Shape tensor must not be empty, got ",
+                                        shape_tensor.DebugString()));
     int32 num_batches = shape_tensor.flat<int32>()(0);
 
     int32 samples_per_batch = 1;
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 7345ccf69eeba9..df8237cb54e1e5 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -387,6 +387,19 @@ struct LaunchAvgPooling3dGradOp<CPUDevice, T> {
                      const std::array<int64, 3>& output_shape,
                      const std::array<int64, 3>& padding,
                      TensorFormat data_format, Tensor* output) {
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(0) == out_backprop.dim_size(0),
+        errors::InvalidArgument(
+            "Expected first dimension of tensor_in_shape and "
+            "out_backprop to match, got ",
+            tensor_in_shape.dim_size(0), " and ", out_backprop.dim_size(0)));
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(4) == out_backprop.dim_size(4),
+        errors::InvalidArgument(
+            "Expected last dimension of tensor_in_shape and "
+            "out_backprop to match, got ",
+            tensor_in_shape.dim_size(4), " and ", out_backprop.dim_size(4)));
+
     output->flat<T>().setZero();
     std::array<int64, 3> input_size = {{tensor_in_shape.dim_size(3),
                                         tensor_in_shape.dim_size(2),
@@ -697,11 +710,36 @@ class MaxPooling3dGradGradOp : public OpKernel {
 
     Pool3dParameters params{context,  ksize_,       stride_,
                             padding_, data_format_, tensor_in.shape()};
+    if (!context->status().ok()) return;  // params is invalid
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {2}, 0, tensor_out.shape(), &output));
 
+    // Given access patterns in LaunchMaxPooling3dGradGradOp, these tensors must
+    // have elements.
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument("received empty tensor tensor_in: ",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument("received empty tensor tensor_out: ",
+                                        tensor_out.DebugString()));
+    OP_REQUIRES(
+        context, out_grad_backprop.NumElements() > 0,
+        errors::InvalidArgument("received empty tensor out_grad_backprop: ",
+                                out_grad_backprop.DebugString()));
+    OP_REQUIRES(context,
+                tensor_in.NumElements() == out_grad_backprop.NumElements(),
+                errors::InvalidArgument("tensor_in and out_grad_backprop must "
+                                        "have same number of elements, got <",
+                                        tensor_in.DebugString(), "> and <",
+                                        out_grad_backprop.DebugString(), ">"));
+    OP_REQUIRES(
+        context, tensor_out.NumElements() == output->NumElements(),
+        errors::InvalidArgument(
+            "tensor_out and output must have same number of elements, got <",
+            tensor_out.DebugString(), "> and <", output->DebugString(), ">"));
+
     LaunchMaxPooling3dGradGradOp<Device, T>::launch(
         context, params, tensor_in, tensor_out, out_grad_backprop, output);
   }
diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
index 8f71d09c0832e7..790a54c639e159 100644
--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
@@ -71,6 +71,13 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+        ctx, axis_ >= -1,
+        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(
+        ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+        errors::InvalidArgument("Shape must be at least rank ", axis_ + 1,
+                                " but is rank ", input.shape().dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor input_min_tensor;
     Tensor input_max_tensor;
@@ -151,6 +158,10 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, axis_ < input.dims(),
+                errors::InvalidArgument(
+                    "Axis requested is larger than input dimensions. Axis: ",
+                    axis_, " Input Dimensions: ", input.dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 55c69de7d3ea6c..b186f00f15c061 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -538,6 +538,8 @@ class QuantizedAddOp : public OpKernel {
         tensor_min = min_x;
         tensor_max = max_x;
       }
+      OP_REQUIRES(context, vector_num_elements > 0,
+                  errors::InvalidArgument("Must have some elements to add"));
       VectorTensorAddition<T, Toutput>(
           vector_data, vector_min, vector_max, vector_num_elements, tensor_data,
           tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value,
diff --git a/tensorflow/core/kernels/quantized_batch_norm_op.cc b/tensorflow/core/kernels/quantized_batch_norm_op.cc
index b03da7ad17fab4..6dfe07f97a4007 100644
--- a/tensorflow/core/kernels/quantized_batch_norm_op.cc
+++ b/tensorflow/core/kernels/quantized_batch_norm_op.cc
@@ -173,20 +173,50 @@ class QuantizedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float input_min = context->input(1).flat<float>()(0);
-    const float input_max = context->input(2).flat<float>()(0);
+    const auto& input_min_tensor = context->input(1);
+    OP_REQUIRES(context, input_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("input_min must have 1 element"));
+    const float input_min = input_min_tensor.flat<float>()(0);
+    const auto& input_max_tensor = context->input(2);
+    OP_REQUIRES(context, input_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("input_max must have 1 element"));
+    const float input_max = input_max_tensor.flat<float>()(0);
     const Tensor& mean = context->input(3);
-    const float mean_min = context->input(4).flat<float>()(0);
-    const float mean_max = context->input(5).flat<float>()(0);
+    const auto& mean_min_tensor = context->input(4);
+    OP_REQUIRES(context, mean_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("mean_min must have 1 element"));
+    const float mean_min = mean_min_tensor.flat<float>()(0);
+    const auto& mean_max_tensor = context->input(5);
+    OP_REQUIRES(context, mean_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("mean_max must have 1 element"));
+    const float mean_max = mean_max_tensor.flat<float>()(0);
     const Tensor& var = context->input(6);
-    const float var_min = context->input(7).flat<float>()(0);
-    const float var_max = context->input(8).flat<float>()(0);
+    const auto& var_min_tensor = context->input(7);
+    OP_REQUIRES(context, var_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("var_min must have 1 element"));
+    const float var_min = var_min_tensor.flat<float>()(0);
+    const auto& var_max_tensor = context->input(8);
+    OP_REQUIRES(context, var_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("var_max must have 1 element"));
+    const float var_max = var_max_tensor.flat<float>()(0);
     const Tensor& beta = context->input(9);
-    const float beta_min = context->input(10).flat<float>()(0);
-    const float beta_max = context->input(11).flat<float>()(0);
+    const auto& beta_min_tensor = context->input(10);
+    OP_REQUIRES(context, beta_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("beta_min must have 1 element"));
+    const float beta_min = beta_min_tensor.flat<float>()(0);
+    const auto& beta_max_tensor = context->input(11);
+    OP_REQUIRES(context, beta_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("beta_max must have 1 element"));
+    const float beta_max = beta_max_tensor.flat<float>()(0);
     const Tensor& gamma = context->input(12);
-    const float gamma_min = context->input(13).flat<float>()(0);
-    const float gamma_max = context->input(14).flat<float>()(0);
+    const auto& gamma_min_tensor = context->input(13);
+    OP_REQUIRES(context, gamma_min_tensor.NumElements() == 1,
+                errors::InvalidArgument("gamma_min must have 1 element"));
+    const float gamma_min = gamma_min_tensor.flat<float>()(0);
+    const auto& gamma_max_tensor = context->input(14);
+    OP_REQUIRES(context, gamma_max_tensor.NumElements() == 1,
+                errors::InvalidArgument("gamma_max must have 1 element"));
+    const float gamma_max = gamma_max_tensor.flat<float>()(0);
 
     OP_REQUIRES(context, input.dims() == 4,
                 errors::InvalidArgument("input must be 4-dimensional",
@@ -203,6 +233,33 @@ class QuantizedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, gamma.dims() == 1,
                 errors::InvalidArgument("gamma must be 1-dimensional",
                                         gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument("Must have at least a mean value",
+                                        gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument("Must have at least a mean value"));
+    const auto last_dim = input.shape().dims() - 1;
+    OP_REQUIRES(context,
+                mean.shape().dim_size(0) == input.shape().dim_size(last_dim),
+                errors::InvalidArgument("Must provide as many means as the "
+                                        "last dimension of the input tensor: ",
+                                        mean.shape().DebugString(), " vs. ",
+                                        input.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == var.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and variance tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", var.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == beta.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and beta tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", beta.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == gamma.shape().dim_size(0),
+        errors::InvalidArgument(
+            "Mean and gamma tensors must have the same shape: ",
+            mean.shape().DebugString(), " vs. ", gamma.shape().DebugString()));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index 5457d290c2559c..db0e21a498011d 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -56,6 +56,8 @@ class QuantizedBiasAddOp : public OpKernel {
             "Must provide as many biases as the last dimension "
             "of the input tensor: ",
             bias.shape().DebugString(), " vs. ", input.shape().DebugString()));
+    OP_REQUIRES(context, bias.NumElements() > 0,
+                errors::InvalidArgument("Must provide at least 1 bias"));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 5b3570edff5fee..893e68fa8acbcc 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -227,8 +227,12 @@ class Im2ColConvFunctor {
       return;
     }
 
-    CHECK_GT(output_width, 0);
-    CHECK_GT(output_height, 0);
+    OP_REQUIRES(
+        context, output_width > 0,
+        errors::InvalidArgument("output_width must be strictly positive"));
+    OP_REQUIRES(
+        context, output_height > 0,
+        errors::InvalidArgument("output_height must be strictly positive"));
     int filter_left_offset;
     int filter_top_offset;
     if (padding == VALID) {
@@ -255,6 +259,9 @@ class Im2ColConvFunctor {
     // by the width, then the height. This is the standard memory order in the
     // image world if it helps to visualize it.
     const int filter_value_count = filter_width * filter_height * input_depth;
+    OP_REQUIRES(context, filter_value_count > 0,
+                errors::InvalidArgument(
+                    "filter patch must contain at least one element"));
     const int64 patches_per_chunk =
         kMaxChunkSize / (filter_value_count * sizeof(T1));
     const int64 chunk_value_count =
diff --git a/tensorflow/core/kernels/quantized_mul_op.cc b/tensorflow/core/kernels/quantized_mul_op.cc
index 4e191f162662bb..22cff8939449a6 100644
--- a/tensorflow/core/kernels/quantized_mul_op.cc
+++ b/tensorflow/core/kernels/quantized_mul_op.cc
@@ -284,10 +284,22 @@ class QuantizedMulOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& x = context->input(0);
     const Tensor& y = context->input(1);
-    const float min_x = context->input(2).flat<float>()(0);
-    const float max_x = context->input(3).flat<float>()(0);
-    const float min_y = context->input(4).flat<float>()(0);
-    const float max_y = context->input(5).flat<float>()(0);
+    auto& min_x_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
+                errors::InvalidArgument("min_x must be a scalar"));
+    const float min_x = min_x_tensor.flat<float>()(0);
+    auto& max_x_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
+                errors::InvalidArgument("max_x must be a scalar"));
+    const float max_x = max_x_tensor.flat<float>()(0);
+    auto& min_y_tensor = context->input(4);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
+                errors::InvalidArgument("min_y must be a scalar"));
+    const float min_y = min_y_tensor.flat<float>()(0);
+    auto& max_y_tensor = context->input(5);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
+                errors::InvalidArgument("max_y must be a scalar"));
+    const float max_y = max_y_tensor.flat<float>()(0);
 
     BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
     if (!bcast.IsValid()) {
@@ -347,6 +359,11 @@ class QuantizedMulOp : public OpKernel {
         tensor_num_elements = x.NumElements();
         tensor_offset = offset_x;
       }
+      if (vector_num_elements == 0) {
+        context->SetStatus(
+            errors::InvalidArgument("vector must have at least 1 element"));
+        return;
+      }
       VectorTensorMultiply<T, Toutput>(
           vector_data, vector_offset, vector_num_elements, tensor_data,
           tensor_offset, tensor_num_elements, z_data);
diff --git a/tensorflow/core/kernels/quantized_reshape_op.cc b/tensorflow/core/kernels/quantized_reshape_op.cc
index bd76c94edeea7a..682f4aaa1f79e7 100644
--- a/tensorflow/core/kernels/quantized_reshape_op.cc
+++ b/tensorflow/core/kernels/quantized_reshape_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/reshape_op.h"
@@ -30,9 +31,29 @@ class QuantizedReshapeOp : public ReshapeOp {
   void Compute(OpKernelContext* ctx) override {
     // This call processes inputs 1 and 2 to write output 0.
     ReshapeOp::Compute(ctx);
+    if (!ctx->status().ok()) {
+      return;
+    }
+
+    const auto& input_min_float_tensor = ctx->input(2);
+    const auto& input_min_float_shape = input_min_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_min_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_min_float_shape) &&
+                     (input_min_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    "input_min must be a scalar or a vector of 1 element"));
+    const float input_min_float = input_min_float_tensor.flat<float>()(0);
+    const auto& input_max_float_tensor = ctx->input(3);
+    const auto& input_max_float_shape = input_max_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_max_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_max_float_shape) &&
+                     (input_max_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    "input_max must be a scalar or a vector of 1 element"));
+    const float input_max_float = input_max_float_tensor.flat<float>()(0);
 
-    const float input_min_float = ctx->input(2).flat<float>()(0);
-    const float input_max_float = ctx->input(3).flat<float>()(0);
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min));
     output_min->flat<float>()(0) = input_min_float;
diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
index 59e33d1ac31477..c84c67580e98bd 100644
--- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
+++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc
@@ -64,6 +64,8 @@ inline void ComputeInterpolationWeights(
         std::max(static_cast<int64>(in_f), static_cast<int64>(0));
     interpolation->upper[i] =
         std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
+    interpolation->lower[i] =
+        std::min(interpolation->lower[i], interpolation->upper[i]);
     interpolation->lerp[i] = in - in_f;
     interpolation->ilerp[i] =
         static_cast<T_SCALE>((in - in_f) * (1 << resolution));
@@ -701,8 +703,14 @@ class QuantizedResizeBilinearOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float in_min = context->input(2).flat<float>()(0);
-    const float in_max = context->input(3).flat<float>()(0);
+    const auto& in_min_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()),
+                errors::InvalidArgument("min must be a scalar"));
+    const float in_min = in_min_tensor.flat<float>()(0);
+    const auto& in_max_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()),
+                errors::InvalidArgument("max must be a scalar"));
+    const float in_max = in_max_tensor.flat<float>()(0);
 
     ImageResizerState st(align_corners_, false);
     st.ValidateAndCreateOutput(context, input);
diff --git a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
index ca9e1836c82127..b961468f274d0f 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_tensor_op.cc
@@ -207,7 +207,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     DCHECK_EQ(result->size(), first_dimension);
   }
 
-  void CalculateOutputIndexRowSplit(
+  Status CalculateOutputIndexRowSplit(
       const RowPartitionTensor& row_split,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
@@ -232,9 +232,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
         result->push_back(-1);
       }
     }
-    if (row_split_size > 0) {
-      DCHECK_EQ(result->size(), row_split(row_split_size - 1));
+    if (row_split_size > 0 && result->size() != row_split(row_split_size - 1)) {
+      return errors::InvalidArgument("Invalid row split size.");
     }
+
+    return Status::OK();
   }
 
   // Calculate the output index of the first element of a list.
@@ -258,7 +260,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   // result[6] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[8] = parent_output_index[value_rowids[7]]
-  void CalculateOutputIndexValueRowID(
+  Status CalculateOutputIndexValueRowID(
       const RowPartitionTensor& value_rowids,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
@@ -266,12 +268,18 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     const INDEX_TYPE index_size = value_rowids.size();
     result->reserve(index_size);
     if (index_size == 0) {
-      return;
+      return Status::OK();
     }
 
     INDEX_TYPE current_output_column = 0;
     INDEX_TYPE current_value_rowid = value_rowids(0);
-    DCHECK_LT(current_value_rowid, parent_output_index.size());
+
+    if (current_value_rowid >= parent_output_index.size()) {
+      return errors::InvalidArgument(
+          "Got current_value_rowid=", current_value_rowid,
+          " which is not less than ", parent_output_index.size());
+    }
+
     INDEX_TYPE current_output_index = parent_output_index[current_value_rowid];
     result->push_back(current_output_index);
     for (INDEX_TYPE i = 1; i < index_size; ++i) {
@@ -288,12 +296,23 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
       } else {
         current_output_column = 0;
         current_value_rowid = next_value_rowid;
-        DCHECK_LT(next_value_rowid, parent_output_index.size());
+
+        if (next_value_rowid >= parent_output_index.size()) {
+          return errors::InvalidArgument(
+              "Got next_value_rowid=", next_value_rowid,
+              " which is not less than ", parent_output_index.size());
+        }
+
         current_output_index = parent_output_index[next_value_rowid];
       }
       result->push_back(current_output_index);
     }
-    DCHECK_EQ(result->size(), value_rowids.size());
+
+    if (result->size() != value_rowids.size()) {
+      return errors::InvalidArgument("Invalid row ids.");
+    }
+
+    return Status::OK();
   }
 
   Status CalculateOutputIndex(OpKernelContext* context, int dimension,
@@ -306,15 +325,19 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     auto partition_type = GetRowPartitionTypeByDimension(dimension);
     switch (partition_type) {
       case RowPartitionType::VALUE_ROWIDS:
-        CalculateOutputIndexValueRowID(
+        return CalculateOutputIndexValueRowID(
             row_partition_tensor, parent_output_index, output_index_multiplier,
             output_size, result);
-        return tensorflow::Status::OK();
       case RowPartitionType::ROW_SPLITS:
-        CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index,
-                                     output_index_multiplier, output_size,
-                                     result);
-        return tensorflow::Status::OK();
+        if (row_partition_tensor.size() - 1 > parent_output_index.size()) {
+          return errors::InvalidArgument(
+              "Row partition size is greater than output size: ",
+              row_partition_tensor.size() - 1, " > ",
+              parent_output_index.size());
+        }
+        return CalculateOutputIndexRowSplit(
+            row_partition_tensor, parent_output_index, output_index_multiplier,
+            output_size, result);
       default:
         return errors::InvalidArgument(
             "Unsupported partition type:",
@@ -345,6 +368,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     INDEX_TYPE first_dimension;
+    const Tensor first_partition_tensor =
+        context->input(kFirstPartitionInputIndex);
+    OP_REQUIRES(context, first_partition_tensor.NumElements() > 0,
+                errors::InvalidArgument("Invalid first partition input. Tensor "
+                                        "requires at least one element."));
     OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension));
     vector<INDEX_TYPE> output_size;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
index 7a5ae1c6240b55..49c4a2411b8c3c 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
@@ -175,6 +175,11 @@ class RaggedTensorToVariantOp : public OpKernel {
 
     // Unbatch the Ragged Tensor and encode the components.
     std::vector<RaggedTensor> ragged_components;
+    auto batched_splits_top_vec =
+        batched_ragged_input.nested_splits[0].vec<SPLIT_TYPE>();
+    int num_components = batched_splits_top_vec.size() - 1;
+    OP_REQUIRES(context, num_components >= 0,
+                errors::Internal("Invalid split argument."));
     OP_REQUIRES_OK(context, UnbatchRaggedZerothDim<VALUE_TYPE, SPLIT_TYPE>(
                                 batched_ragged_input, &ragged_components));
     std::vector<Tensor> encoded_components(ragged_components.size());
diff --git a/tensorflow/core/kernels/random_binomial_op.cc b/tensorflow/core/kernels/random_binomial_op.cc
index df27541bb66efa..f89ff5e986d882 100644
--- a/tensorflow/core/kernels/random_binomial_op.cc
+++ b/tensorflow/core/kernels/random_binomial_op.cc
@@ -176,7 +176,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
     auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
 
     auto DoWork = [samples_per_batch, num_elements, &counts, &probs, &gen,
-                   &output](int start_batch, int limit_batch) {
+                   &output](int64 start_batch, int64 limit_batch) {
       // Capturing "gen" by-value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // "gen" by reference and explicitly do a copy assignment here.
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 2fa93fb529cbcb..45379414710ae8 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -204,7 +204,7 @@ class RandomGammaOp : public OpKernel {
     // avoid a couple flops which can be done on a per-alpha basis.
 
     auto DoWork = [num_samples, num_alphas, &rng, samples_flat, alpha_flat](
-                      int start_output, int limit_output) {
+                      int64 start_output, int64 limit_output) {
       using Eigen::numext::exp;
       using Eigen::numext::log;
       using Eigen::numext::pow;
diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc
index 64fb4a5c228480..7962c4322075a6 100644
--- a/tensorflow/core/kernels/random_poisson_op.cc
+++ b/tensorflow/core/kernels/random_poisson_op.cc
@@ -103,7 +103,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
     typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
 
     auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
-                      int start_output, int limit_output) {
+                      int64 start_output, int64 limit_output) {
       // Capturing "rng" by value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // "rng" by reference and explicitly do a copy assignment.
diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
index e26b9fd5ad1c72..23d913038598e3 100644
--- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
+++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h
@@ -337,7 +337,7 @@ __global__ void ColumnReduceMax16ColumnsKernel(
 
 // Maps each block to a column range TF_RED_WARPSIZE wide
 template <typename T, typename OUT_T, typename Op>
-__global__ void ColumnReduceKernel(
+__global__ __launch_bounds__(1024) void ColumnReduceKernel(
     T in, OUT_T out, int num_rows, int num_cols, Op op,
     typename std::iterator_traits<T>::value_type initVal) {
   typedef typename std::iterator_traits<T>::value_type value_type;
@@ -388,7 +388,7 @@ __global__ void ColumnReduceKernel(
     //  -         =
     //            =
     const int numRowsThisBlock =
-        min(blockDim.y, num_rows - blockIdx.y * blockDim.y);
+        min(int(blockDim.y), num_rows - blockIdx.y * blockDim.y);
 
     for (int row = 1; row < numRowsThisBlock; ++row) {
       value_type t = partial_sums[threadIdx.x * (TF_RED_WARPSIZE + 1) + row];
diff --git a/tensorflow/core/kernels/requantization_range_op.cc b/tensorflow/core/kernels/requantization_range_op.cc
index cc6e891a6b352b..f6e217499d1983 100644
--- a/tensorflow/core/kernels/requantization_range_op.cc
+++ b/tensorflow/core/kernels/requantization_range_op.cc
@@ -46,6 +46,10 @@ class RequantizationRangeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, ctx->input(1).NumElements() > 0,
+                errors::InvalidArgument("Input min must not be empty."));
+    OP_REQUIRES(ctx, ctx->input(2).NumElements() > 0,
+                errors::InvalidArgument("Input max must not be empty."));
     const float input_min_float = ctx->input(1).flat<float>()(0);
     const float input_max_float = ctx->input(2).flat<float>()(0);
     Tensor* output_min = nullptr;
diff --git a/tensorflow/core/kernels/reverse_op.cc b/tensorflow/core/kernels/reverse_op.cc
index 98bf8bf8e914c1..4a46bffc3e9c00 100644
--- a/tensorflow/core/kernels/reverse_op.cc
+++ b/tensorflow/core/kernels/reverse_op.cc
@@ -158,6 +158,12 @@ class ReverseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
+    // If input is provided, check to make sure the first dimension is valid.
+    if (input.dims() > 0) {
+      OP_REQUIRES(
+          context, input.dim_size(0) != 0,
+          errors::InvalidArgument("Invalid input first dimension. Found 0."));
+    }
     const Tensor& dims = context->input(1);
 
     if (TensorShapeUtils::IsScalar(input.shape())) {
diff --git a/tensorflow/core/kernels/reverse_sequence_op.cc b/tensorflow/core/kernels/reverse_sequence_op.cc
index 0e112133915d5a..490456fddd76b9 100644
--- a/tensorflow/core/kernels/reverse_sequence_op.cc
+++ b/tensorflow/core/kernels/reverse_sequence_op.cc
@@ -113,6 +113,10 @@ class ReverseSequenceOp : public OpKernel {
       : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("batch_dim", &batch_dim_));
     OP_REQUIRES_OK(context, context->GetAttr("seq_dim", &seq_dim_));
+    OP_REQUIRES(context, batch_dim_ >= 0,
+                errors::InvalidArgument("Invalid batch_dim ", batch_dim_));
+    OP_REQUIRES(context, seq_dim_ >= 0,
+                errors::InvalidArgument("Invalid seq_dim ", seq_dim_));
   }
 
   void Compute(OpKernelContext* context) override {
diff --git a/tensorflow/core/kernels/scan_ops_gpu.h b/tensorflow/core/kernels/scan_ops_gpu.h
index 27da21982af197..2fca556f2fc862 100644
--- a/tensorflow/core/kernels/scan_ops_gpu.h
+++ b/tensorflow/core/kernels/scan_ops_gpu.h
@@ -263,6 +263,12 @@ void LaunchScan(const GPUDevice& d, typename TTypes<T, 3>::ConstTensor in,
   int num_blocks = dimx * dimz;
 
   int ideal_block_size = dimy / items_per_thread;
+#if TENSORFLOW_COMPILER_IS_HIP_CLANG
+  const int rocm_threads_per_warp = 64;
+  ideal_block_size = (ideal_block_size > rocm_threads_per_warp)
+                         ? ideal_block_size
+                         : rocm_threads_per_warp;
+#endif
 
   // There seems to be a bug when the type is not float and block_size 1024.
   // Launch on the smallest power of 2 block size that we can.
diff --git a/tensorflow/core/kernels/sdca_internal.cc b/tensorflow/core/kernels/sdca_internal.cc
index cbc754af0e9bb1..11a3be8bf46a76 100644
--- a/tensorflow/core/kernels/sdca_internal.cc
+++ b/tensorflow/core/kernels/sdca_internal.cc
@@ -99,6 +99,10 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpInputList sparse_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("sparse_weights", &sparse_weights_inputs));
+  if (sparse_indices_inputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        "sparse_indices and sparse_weights must have the same length, got ",
+        sparse_indices_inputs.size(), " and ", sparse_weights_inputs.size());
   OpInputList dense_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("dense_weights", &dense_weights_inputs));
@@ -106,10 +110,20 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpOutputList sparse_weights_outputs;
   TF_RETURN_IF_ERROR(context->output_list("out_delta_sparse_weights",
                                           &sparse_weights_outputs));
+  if (sparse_weights_outputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        "out_delta_sparse_weights and sparse_weights must have the same "
+        "length, got ",
+        sparse_weights_outputs.size(), " and ", sparse_weights_inputs.size());
 
   OpOutputList dense_weights_outputs;
   TF_RETURN_IF_ERROR(
       context->output_list("out_delta_dense_weights", &dense_weights_outputs));
+  if (dense_weights_outputs.size() != dense_weights_inputs.size())
+    return errors::InvalidArgument(
+        "out_delta_dense_weights and dense_weights must have the same length, "
+        "got ",
+        dense_weights_outputs.size(), " and ", dense_weights_inputs.size());
 
   for (int i = 0; i < sparse_weights_inputs.size(); ++i) {
     Tensor* delta_t;
@@ -327,13 +341,28 @@ Status Examples::Initialize(OpKernelContext* const context,
   OpInputList sparse_example_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list("sparse_example_indices",
                                          &sparse_example_indices_inputs));
+  if (sparse_example_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        "Expected ", num_sparse_features,
+        " tensors in sparse_example_indices but got ",
+        sparse_example_indices_inputs.size());
   OpInputList sparse_feature_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list("sparse_feature_indices",
                                          &sparse_feature_indices_inputs));
+  if (sparse_feature_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        "Expected ", num_sparse_features,
+        " tensors in sparse_feature_indices but got ",
+        sparse_feature_indices_inputs.size());
   OpInputList sparse_feature_values_inputs;
   if (num_sparse_features_with_values > 0) {
     TF_RETURN_IF_ERROR(context->input_list("sparse_feature_values",
                                            &sparse_feature_values_inputs));
+    if (sparse_feature_values_inputs.size() != num_sparse_features_with_values)
+      return errors::InvalidArgument(
+          "Expected ", num_sparse_features_with_values,
+          " tensors in sparse_feature_values but got ",
+          sparse_feature_values_inputs.size());
   }
 
   const Tensor* example_weights_t;
@@ -400,6 +429,13 @@ Status Examples::CreateSparseFeatureRepresentation(
           sparse_example_indices_inputs[i].template flat<int64>();
       auto feature_indices =
           sparse_feature_indices_inputs[i].template flat<int64>();
+      if (example_indices.size() != feature_indices.size()) {
+        mutex_lock l(mu);
+        result = errors::InvalidArgument(
+            "Found mismatched example_indices and feature_indices [",
+            example_indices, "] vs [", feature_indices, "]");
+        return;
+      }
 
       // Parse features for each example. Features for a particular example
       // are at the offsets (start_id, end_id]
diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc
index d83a714452f2af..8d3bef7fc3500a 100644
--- a/tensorflow/core/kernels/session_ops.cc
+++ b/tensorflow/core/kernels/session_ops.cc
@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/data_flow_ops.cc.
 
 #include <limits.h>
+
 #include <vector>
 
 #include "tensorflow/core/common_runtime/device.h"
@@ -42,7 +43,11 @@ class GetSessionHandleOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& val = ctx->input(0);
-    int64 id = ctx->session_state()->GetNewId();
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    "GetSessionHandle called on null session state"));
+    int64 id = session_state->GetNewId();
     TensorStore::TensorAndKey tk{val, id, requested_device()};
     OP_REQUIRES_OK(ctx, ctx->tensor_store()->AddTensor(name(), tk));
 
@@ -112,7 +117,11 @@ class GetSessionTensorOp : public OpKernel {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
     Tensor val;
-    OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    "GetSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val));
     ctx->set_output(0, val);
   }
 
@@ -154,7 +163,11 @@ class DeleteSessionTensorOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
-    OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    "DeleteSessionTensor called on null session state"));
+    OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name));
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp);
diff --git a/tensorflow/core/kernels/sparse/kernels.cc b/tensorflow/core/kernels/sparse/kernels.cc
index 0eea9f1feed5c3..d63b5233482cc4 100644
--- a/tensorflow/core/kernels/sparse/kernels.cc
+++ b/tensorflow/core/kernels/sparse/kernels.cc
@@ -63,6 +63,11 @@ Status SparseTensorToCSRSparseMatrixCPUFunctor::operator()(
 
     for (int64 i = 0; i < total_nnz; ++i) {
       // For now, the rows pointers store the corresponding row counts.
+      int64 ix = indices(i, 0) + 1;
+      if (ix >= csr_row_ptr.size()) {
+        return errors::InvalidArgument("Got an index ", ix,
+                                       " that is outside of csr_row_ptr");
+      }
       csr_row_ptr(indices(i, 0) + 1) += 1;
       csr_col_ind(i) = indices(i, 1);
     }
diff --git a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
index 3786033c98cd9f..223fd51f2a2ddf 100644
--- a/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_cholesky_op.cc
@@ -82,8 +82,8 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
 
     int64 num_rows;
     int batch_size;
-    ValidateInputs(ctx, *input_matrix, input_permutation_indices, &batch_size,
-                   &num_rows);
+    OP_REQUIRES_OK(ctx, ValidateInputs(*input_matrix, input_permutation_indices,
+                                       &batch_size, &num_rows));
 
     // Allocate batch pointers.
     Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1}));
@@ -226,49 +226,48 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
   }
 
  private:
-  void ValidateInputs(OpKernelContext* ctx,
-                      const CSRSparseMatrix& sparse_matrix,
-                      const Tensor& permutation_indices, int* batch_size,
-                      int64* num_rows) {
-    OP_REQUIRES(ctx, sparse_matrix.dtype() == DataTypeToEnum<T>::value,
-                errors::InvalidArgument(
-                    "Asked for a CSRSparseMatrix of type ",
-                    DataTypeString(DataTypeToEnum<T>::value),
-                    " but saw dtype: ", DataTypeString(sparse_matrix.dtype())));
+  Status ValidateInputs(const CSRSparseMatrix& sparse_matrix,
+                        const Tensor& permutation_indices, int* batch_size,
+                        int64* num_rows) {
+    if (sparse_matrix.dtype() != DataTypeToEnum<T>::value)
+      return errors::InvalidArgument(
+          "Asked for a CSRSparseMatrix of type ",
+          DataTypeString(DataTypeToEnum<T>::value),
+          " but saw dtype: ", DataTypeString(sparse_matrix.dtype()));
 
     const Tensor& dense_shape = sparse_matrix.dense_shape();
     const int rank = dense_shape.dim_size(0);
-    OP_REQUIRES(ctx, rank == 2 || rank == 3,
-                errors::InvalidArgument("sparse matrix must have rank 2 or 3; ",
-                                        "but dense_shape has size ", rank));
+    if (rank < 2 || rank > 3)
+      return errors::InvalidArgument("sparse matrix must have rank 2 or 3; ",
+                                     "but dense_shape has size ", rank);
     const int row_dim = (rank == 2) ? 0 : 1;
     auto dense_shape_vec = dense_shape.vec<int64>();
     *num_rows = dense_shape_vec(row_dim);
     const int64 num_cols = dense_shape_vec(row_dim + 1);
-    OP_REQUIRES(ctx, *num_rows == num_cols,
-                errors::InvalidArgument("sparse matrix must be square; got: ",
-                                        *num_rows, " != ", num_cols));
+    if (*num_rows != num_cols)
+      return errors::InvalidArgument(
+          "sparse matrix must be square; got: ", *num_rows, " != ", num_cols);
     const TensorShape& perm_shape = permutation_indices.shape();
-    OP_REQUIRES(
-        ctx, perm_shape.dims() + 1 == rank,
-        errors::InvalidArgument(
-            "sparse matrix must have the same rank as permutation; got: ", rank,
-            " != ", perm_shape.dims(), " + 1."));
-    OP_REQUIRES(
-        ctx, perm_shape.dim_size(rank - 2) == *num_rows,
-        errors::InvalidArgument(
-            "permutation must have the same number of elements in each batch "
-            "as the number of rows in sparse matrix; got: ",
-            perm_shape.dim_size(rank - 2), " != ", *num_rows));
+    if (perm_shape.dims() + 1 != rank)
+      return errors::InvalidArgument(
+          "sparse matrix must have the same rank as permutation; got: ", rank,
+          " != ", perm_shape.dims(), " + 1.");
+    if (perm_shape.dim_size(rank - 2) != *num_rows)
+      return errors::InvalidArgument(
+          "permutation must have the same number of elements in each batch "
+          "as the number of rows in sparse matrix; got: ",
+          perm_shape.dim_size(rank - 2), " != ", *num_rows);
 
     *batch_size = sparse_matrix.batch_size();
     if (*batch_size > 1) {
-      OP_REQUIRES(
-          ctx, perm_shape.dim_size(0) == *batch_size,
-          errors::InvalidArgument("permutation must have the same batch size "
-                                  "as sparse matrix; got: ",
-                                  perm_shape.dim_size(0), " != ", *batch_size));
+      if (perm_shape.dim_size(0) != *batch_size)
+        return errors::InvalidArgument(
+            "permutation must have the same batch size "
+            "as sparse matrix; got: ",
+            perm_shape.dim_size(0), " != ", *batch_size);
     }
+
+    return Status::OK();
   }
 };
 
diff --git a/tensorflow/core/kernels/sparse_add_op.cc b/tensorflow/core/kernels/sparse_add_op.cc
index d16317af671dd6..6418c0e50af8a5 100644
--- a/tensorflow/core/kernels/sparse_add_op.cc
+++ b/tensorflow/core/kernels/sparse_add_op.cc
@@ -43,6 +43,11 @@ class SparseAddOp : public OpKernel {
                     b_indices->shape().DebugString()));
     const int64 a_nnz = a_indices->dim_size(0);
     const int64 b_nnz = b_indices->dim_size(0);
+    const int num_dims = a_indices->dim_size(1);
+    OP_REQUIRES(ctx, b_indices->dim_size(1) == num_dims,
+                errors::InvalidArgument(
+                    "Input indices must have the same dimension, got ",
+                    num_dims, " and ", b_indices->dim_size(1)));
 
     OP_REQUIRES_OK(ctx, ctx->input("a_values", &a_values_t));
     OP_REQUIRES_OK(ctx, ctx->input("b_values", &b_values_t));
@@ -71,6 +76,13 @@ class SparseAddOp : public OpKernel {
                     "Input shapes should be a vector but received shapes ",
                     a_shape->shape().DebugString(), " and ",
                     b_shape->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, a_shape->NumElements() == num_dims,
+        errors::InvalidArgument("Second dimension of a_indices and length of "
+                                "a_shape must match, got ",
+                                num_dims, " and ", a_shape->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument("Tesors must not be empty"));
     OP_REQUIRES(
         ctx, a_shape->IsSameSize(*b_shape),
         errors::InvalidArgument(
@@ -99,7 +111,6 @@ class SparseAddOp : public OpKernel {
     std::vector<std::pair<bool, int64>> entries_to_copy;  // from_a?, idx
     entries_to_copy.reserve(a_nnz + b_nnz);
     std::vector<T> out_values;
-    const int num_dims = a_shape->dim_size(0);
 
     // The input and output sparse tensors are assumed to be ordered along
     // increasing dimension number.
diff --git a/tensorflow/core/kernels/sparse_concat_op.cc b/tensorflow/core/kernels/sparse_concat_op.cc
index 3b2a0cb0f34ed3..d49f92ea556eb2 100644
--- a/tensorflow/core/kernels/sparse_concat_op.cc
+++ b/tensorflow/core/kernels/sparse_concat_op.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -66,13 +67,32 @@ class SparseConcatOp : public OpKernel {
     OP_REQUIRES(context, shapes.size() == N,
                 errors::InvalidArgument("Expected ", N, " input shapes, got ",
                                         shapes.size()));
+    bool overflow_ocurred = false;
     for (int i = 0; i < N; i++) {
+      int64 new_num_elements = 1;
       OP_REQUIRES(context, TensorShapeUtils::IsVector(shapes[i].shape()),
                   errors::InvalidArgument(
                       "Input shapes should be a vector but received shape ",
                       shapes[i].shape().DebugString(), " at position ", i));
+      auto input_shape_vector = shapes[i].vec<int64>();
+      for (int j = 0; j < input_shape_vector.size(); j++) {
+        new_num_elements =
+            MultiplyWithoutOverflow(new_num_elements, input_shape_vector(j));
+        if (new_num_elements < 0) {
+          overflow_ocurred = true;
+          break;
+        }
+      }
+
+      if (overflow_ocurred) {
+        break;
+      }
     }
 
+    OP_REQUIRES(
+        context, !overflow_ocurred,
+        errors::Internal("Encountered overflow from large input shape."));
+
     const TensorShape input_shape(shapes[0].vec<int64>());
     const int input_rank = input_shape.dims();
     const int concat_dim = (concat_dim_attr_ < 0)
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index a16e34c7cb4424..1e68a6bcadcdc8 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/fingerprint.h"
@@ -295,6 +296,7 @@ class SparseCrossOp : public OpKernel {
     int64 signed_hash_key_;
     OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_));
     hash_key_ = static_cast<uint64>(signed_hash_key_);
+    OP_REQUIRES_OK(context, context->GetAttr("internal_type", &internal_type_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -307,9 +309,10 @@ class SparseCrossOp : public OpKernel {
     OpInputList dense_list_in;
     OP_REQUIRES_OK(context,
                    context->input_list("dense_inputs", &dense_list_in));
-
-    ValidateInput(context, indices_list_in, values_list_in, shapes_list_in,
-                  dense_list_in);
+    DataType internal_type = internal_type_;
+    ValidateInput(context, indices_list_in, values_list_in,
+                  shapes_list_in, dense_list_in, internal_type);
+    OP_REQUIRES_OK(context, context->status());
 
     std::vector<std::unique_ptr<ColumnInterface<InternalType>>> columns =
         GenerateColumnsFromInput(indices_list_in, values_list_in,
@@ -352,10 +355,21 @@ class SparseCrossOp : public OpKernel {
                      const OpInputList& indices_list_in,
                      const OpInputList& values_list_in,
                      const OpInputList& shapes_list_in,
-                     const OpInputList& dense_list_in) {
+                     const OpInputList& dense_list_in,
+                     const DataType& internal_type) {
     const auto size = indices_list_in.size();
+    // Only perform internal_type check for SparseCrossOp.
+    // Check if the internal_type is not invalid before doing so.
+    bool check_type = internal_type != DT_INVALID;
     // Validates indices_list_in OpInputList.
     for (int i = 0; i < size; i++) {
+      if (check_type && indices_list_in[i].dtype() != DT_INT64) {
+          OP_REQUIRES(context, false,
+                      errors::InvalidArgument(
+                          "Input indices should be of type ",
+                          DT_INT64, " but received ",
+                          indices_list_in[i].dtype()));
+      }
       OP_REQUIRES(
           context, TensorShapeUtils::IsMatrix(indices_list_in[i].shape()),
           errors::InvalidArgument(
@@ -374,6 +388,16 @@ class SparseCrossOp : public OpKernel {
         errors::InvalidArgument("Expected ", size, " input values, got ",
                                 values_list_in.size()));
     for (int i = 0; i < size; i++) {
+      // Make sure to avoid the expected type to be string, but input values to be
+      // int64.
+      if (check_type && internal_type == DT_STRING &&
+          values_list_in[i].dtype() == DT_INT64) {
+          OP_REQUIRES(context, false,
+                      errors::InvalidArgument(
+                          "Input values should be of internal type ",
+                          internal_type, " but received ",
+                          values_list_in[i].dtype()));
+      }
       OP_REQUIRES(
           context, TensorShapeUtils::IsVector(values_list_in[i].shape()),
           errors::InvalidArgument(
@@ -396,6 +420,13 @@ class SparseCrossOp : public OpKernel {
                                 shapes_list_in.size()));
     const auto batch_size = CalculateBatchSize(shapes_list_in, dense_list_in);
     for (int i = 0; i < size; i++) {
+      if (check_type && shapes_list_in[i].dtype() != DT_INT64) {
+          OP_REQUIRES(context, false,
+                      errors::InvalidArgument(
+                          "Input shape should be of type ",
+                          DT_INT64, " but received ",
+                          shapes_list_in[i].dtype()));
+      }
       OP_REQUIRES(
           context, TensorShapeUtils::IsVector(shapes_list_in[i].shape()),
           errors::InvalidArgument(
@@ -415,6 +446,16 @@ class SparseCrossOp : public OpKernel {
 
     // Validates dense_list_in OpInputList
     for (int i = 0; i < dense_list_in.size(); ++i) {
+      // Make sure to avoid the expected type to be string, but input values to be
+      // int64.
+      if (check_type && internal_type == DT_STRING &&
+          dense_list_in[i].dtype() == DT_INT64) {
+          OP_REQUIRES(context, false,
+                      errors::InvalidArgument(
+                          "Dense inputs should be of internal type ",
+                          internal_type, " but received ",
+                          dense_list_in[i].dtype()));
+      }
       OP_REQUIRES(
           context, TensorShapeUtils::IsMatrix(dense_list_in[i].shape()),
           errors::InvalidArgument(
@@ -551,6 +592,7 @@ class SparseCrossOp : public OpKernel {
   }
   int64 num_buckets_;
   uint64 hash_key_;
+  DataType internal_type_;
 };
 
 REGISTER_KERNEL_BUILDER(Name("SparseCross")
diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
index 3a5e66a0e73ea6..dac4a3d3e6bfcd 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
@@ -78,6 +78,11 @@ class SparseDenseBinaryOpShared : public OpKernel {
                     "but received shapes: ",
                     values_t->shape().DebugString(), " and ",
                     shape_t->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, values_t->dim_size(0) == indices_t->dim_size(0),
+        errors::InvalidArgument(
+            "The first dimension of values and indices should match. (",
+            values_t->dim_size(0), " vs. ", indices_t->dim_size(0), ")"));
 
     const auto indices_mat = indices_t->matrix<int64>();
     const auto shape_vec = shape_t->vec<int64>();
diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
index c9365be5119391..c30adb7a137176 100644
--- a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
+++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc
@@ -66,7 +66,8 @@ class SparseFillEmptyRowsOp : public OpKernel {
                                 default_value_t->shape().DebugString()));
     // TODO(ebrevdo): add shape checks between values, indices,
     // dense_shape.  Also add check that dense rank > 0.
-
+    OP_REQUIRES(context, dense_shape_t->NumElements() != 0,
+                errors::InvalidArgument("Dense shape cannot be empty."));
     const T& default_value = default_value_t->scalar<T>()();
     const auto indices = indices_t->matrix<int64>();
     const auto values = values_t->vec<T>();
@@ -213,6 +214,9 @@ class SparseFillEmptyRowsGradOp : public OpKernel {
         context, TensorShapeUtils::IsVector(reverse_index_map_t->shape()),
         errors::InvalidArgument("reverse_index_map must be a vector, saw: ",
                                 reverse_index_map_t->shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(grad_values_t->shape()),
+                errors::InvalidArgument("grad_values must be a vector, saw: ",
+                                        grad_values_t->shape().DebugString()));
 
     const auto reverse_index_map = reverse_index_map_t->vec<int64>();
     const auto grad_values = grad_values_t->vec<T>();
@@ -241,8 +245,13 @@ class SparseFillEmptyRowsGradOp : public OpKernel {
       // Locate the index of the output of the forward prop associated
       // with this location in the input of the forward prop.  Copy
       // the gradient into it.  Mark it as visited.
-      d_values(i) = grad_values(reverse_index_map(i));
-      visited(reverse_index_map(i)) = true;
+      int64 reverse_index = reverse_index_map(i);
+      OP_REQUIRES(
+          context, 0 <= reverse_index && reverse_index < N_full,
+          errors::InvalidArgument("Elements in reverse index must be in [0, ",
+                                  N_full, ") but got ", reverse_index));
+      d_values(i) = grad_values(reverse_index);
+      visited(reverse_index) = true;
     }
     for (int j = 0; j < N_full; ++j) {
       // The default value gradient gets the accumulated remainder of
diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc
index a2ee69cecd7fe7..aa9692bd5420be 100644
--- a/tensorflow/core/kernels/sparse_matmul_op.cc
+++ b/tensorflow/core/kernels/sparse_matmul_op.cc
@@ -1039,6 +1039,10 @@ class SparseMatMulOp : public OpKernel {
     if (transpose_b) {
       // TODO(agarwal): avoid transposing the matrix here and directly handle
       // transpose in CreateDenseSlices.
+      OP_REQUIRES(ctx, right->dim_size(0) != 0,
+                  errors::InvalidArgument("b has an entry 0 in it's shape."));
+      OP_REQUIRES(ctx, right->dim_size(1) != 0,
+                  errors::InvalidArgument("b has an entry 0 in it's shape."));
       right_tr.reset(
           new Tensor(right->dtype(),
                      TensorShape({right->dim_size(1), right->dim_size(0)})));
diff --git a/tensorflow/core/kernels/sparse_reshape_op.cc b/tensorflow/core/kernels/sparse_reshape_op.cc
index 059519a913b7e7..f6e94d190ec9fa 100644
--- a/tensorflow/core/kernels/sparse_reshape_op.cc
+++ b/tensorflow/core/kernels/sparse_reshape_op.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/reshape_util.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 
 namespace tensorflow {
@@ -34,6 +35,17 @@ class SparseReshapeOp : public OpKernel {
   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
+    const Tensor& input_indices_in = context->input(0);
+    const Tensor& input_shape_in = context->input(1);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()),
+                errors::InvalidArgument("Input must be a matrix."));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
+                errors::InvalidArgument("Input shape must be a vector."));
+    OP_REQUIRES(context,
+                input_indices_in.dim_size(1) == input_shape_in.dim_size(0),
+                errors::InvalidArgument(
+                    "Input tensor rank must match input shape length."));
     Reshape(context, context->input(0), context->input(1), context->input(2),
             0 /* output indices index */, 1 /* output shape index */);
   }
diff --git a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
index 09cb2a6a71c7c0..3ad87bb2e419d4 100644
--- a/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_sparse_binary_op_shared.cc
@@ -150,6 +150,7 @@ class SparseSparseBinaryOpShared : public OpKernel {
 
     const int64 a_nnz = a_indices_t->dim_size(0);
     const int64 b_nnz = b_indices_t->dim_size(0);
+
     const auto a_values = a_values_t->vec<T>();
     const auto b_values = b_values_t->vec<T>();
 
@@ -166,6 +167,14 @@ class SparseSparseBinaryOpShared : public OpKernel {
                     "Input shapes should be a vector but received shapes ",
                     a_shape_t->shape().DebugString(), " and ",
                     b_shape_t->shape().DebugString()));
+    const int num_dims = a_indices_t->dim_size(1);
+    OP_REQUIRES(
+        ctx, a_shape_t->NumElements() == num_dims,
+        errors::InvalidArgument("Second dimension of a_indices and length of "
+                                "a_shape must match, got ",
+                                num_dims, " and ", a_shape_t->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument("Tensors must not be empty"));
     OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
                 errors::InvalidArgument(
                     "Operands do not have the same ranks; got shapes: ",
@@ -180,7 +189,6 @@ class SparseSparseBinaryOpShared : public OpKernel {
                                           " for dimension ", i));
     }
 
-    const int num_dims = a_indices_t->dim_size(1);
     const auto a_indices_mat = a_indices_t->matrix<int64>();
     const auto b_indices_mat = b_indices_t->matrix<int64>();
     std::vector<T> a_augmented_values, b_augmented_values;
diff --git a/tensorflow/core/kernels/sparse_split_op.cc b/tensorflow/core/kernels/sparse_split_op.cc
index 3d02be47cbbef5..b0c147da8a8344 100644
--- a/tensorflow/core/kernels/sparse_split_op.cc
+++ b/tensorflow/core/kernels/sparse_split_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -63,6 +64,16 @@ class SparseSplitOp : public OpKernel {
                                 input_shape.vec<int64>()(split_dim), "), got ",
                                 num_split_));
 
+    // Prevent overflow by constructing the dense shape separately
+    int64 total_elements = 1;
+    const auto input_shape_flat = input_shape.flat<int64>();
+    for (int i = 0; i < input_shape.NumElements(); i++) {
+      total_elements =
+          MultiplyWithoutOverflow(total_elements, input_shape_flat(i));
+      OP_REQUIRES(context, total_elements >= 0,
+                  errors::Internal("Encountered overflow in dense shape"));
+    }
+
     sparse::SparseTensor sparse_tensor;
     OP_REQUIRES_OK(context,
                    sparse::SparseTensor::Create(
diff --git a/tensorflow/core/kernels/sparse_tensors_map_ops.cc b/tensorflow/core/kernels/sparse_tensors_map_ops.cc
index 939638b37058bf..1d51823905bf66 100644
--- a/tensorflow/core/kernels/sparse_tensors_map_ops.cc
+++ b/tensorflow/core/kernels/sparse_tensors_map_ops.cc
@@ -21,9 +21,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -31,6 +28,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/sparse/sparse_tensor.h"
 
 namespace tensorflow {
@@ -253,7 +251,22 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
         errors::InvalidArgument(
             "Rank of input SparseTensor should be > 1, but saw rank: ", rank));
 
-    TensorShape tensor_input_shape(input_shape->vec<int64>());
+    auto input_shape_vec = input_shape->vec<int64>();
+    int new_num_elements = 1;
+    bool overflow_ocurred = false;
+    for (int i = 0; i < input_shape_vec.size(); i++) {
+      new_num_elements =
+          MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i));
+      if (new_num_elements < 0) {
+        overflow_ocurred = true;
+      }
+    }
+
+    OP_REQUIRES(
+        context, !overflow_ocurred,
+        errors::Internal("Encountered overflow from large input shape."));
+
+    TensorShape tensor_input_shape(input_shape_vec);
     gtl::InlinedVector<int64, 8> std_order(rank);
     std::iota(std_order.begin(), std_order.end(), 0);
     SparseTensor input_st;
@@ -261,8 +274,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
                                                  tensor_input_shape, std_order,
                                                  &input_st));
 
-    auto input_shape_t = input_shape->vec<int64>();
-    const int64 N = input_shape_t(0);
+    const int64 N = input_shape_vec(0);
 
     Tensor sparse_handles(DT_INT64, TensorShape({N}));
     auto sparse_handles_t = sparse_handles.vec<int64>();
@@ -273,7 +285,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
     // minibatch entries.
     TensorShape output_shape;
     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                input_shape_t.data() + 1,
+                                input_shape_vec.data() + 1,
                                 input_shape->NumElements() - 1, &output_shape));
 
     // Get groups by minibatch dimension
diff --git a/tensorflow/core/kernels/string_ngrams_op.cc b/tensorflow/core/kernels/string_ngrams_op.cc
index dc757a01fcf4d9..1246c8ef0908b1 100644
--- a/tensorflow/core/kernels/string_ngrams_op.cc
+++ b/tensorflow/core/kernels/string_ngrams_op.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "absl/strings/ascii.h"
 #include "absl/strings/str_cat.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
 namespace text {
@@ -60,6 +61,30 @@ class StringNGramsOp : public tensorflow::OpKernel {
     OP_REQUIRES_OK(context, context->input("data_splits", &splits));
     const auto& splits_vec = splits->flat<SPLITS_TYPE>();
 
+    // Validate that the splits are valid indices into data, only if there are
+    // splits specified.
+    const int input_data_size = data->flat<tstring>().size();
+    const int splits_vec_size = splits_vec.size();
+    if (splits_vec_size > 0) {
+      int prev_split = splits_vec(0);
+      OP_REQUIRES(context, prev_split == 0,
+                  errors::InvalidArgument("First split value must be 0, got ",
+                                          prev_split));
+      for (int i = 1; i < splits_vec_size; ++i) {
+        bool valid_splits = splits_vec(i) >= prev_split;
+        valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
+        OP_REQUIRES(context, valid_splits,
+                    errors::InvalidArgument(
+                        "Invalid split value ", splits_vec(i), ", must be in [",
+                        prev_split, ", ", input_data_size, "]"));
+        prev_split = splits_vec(i);
+      }
+      OP_REQUIRES(context, prev_split == input_data_size,
+                  errors::InvalidArgument(
+                      "Last split value must be data size. Expected ",
+                      input_data_size, ", got ", prev_split));
+    }
+
     // If there is no data or size, return an empty RT.
     if (data->flat<tstring>().size() == 0 || splits_vec.size() == 0) {
       tensorflow::Tensor* empty;
@@ -160,13 +185,31 @@ class StringNGramsOp : public tensorflow::OpKernel {
         ngram->append(left_pad_);
         ngram->append(separator_);
       }
+      // Only output first num_tokens - 1 pairs of data and separator
       for (int n = 0; n < num_tokens - 1; ++n) {
         ngram->append(data[data_start_index + n]);
         ngram->append(separator_);
       }
-      ngram->append(data[data_start_index + num_tokens - 1]);
-      for (int n = 0; n < right_padding; ++n) {
-        ngram->append(separator_);
+      // Handle case when there are no tokens or no right padding as these can
+      // result in consecutive separators.
+      if (num_tokens > 0) {
+        // If we have tokens, then output last and then pair each separator with
+        // the right padding that follows, to ensure ngram ends either with the
+        // token or with the right pad.
+        ngram->append(data[data_start_index + num_tokens - 1]);
+        for (int n = 0; n < right_padding; ++n) {
+          ngram->append(separator_);
+          ngram->append(right_pad_);
+        }
+      } else {
+        // If we don't have tokens, then the last item inserted into the ngram
+        // has been the separator from the left padding loop above. Hence,
+        // output right pad and separator and make sure to finish with a
+        // padding, not a separator.
+        for (int n = 0; n < right_padding - 1; ++n) {
+          ngram->append(right_pad_);
+          ngram->append(separator_);
+        }
         ngram->append(right_pad_);
       }
 
diff --git a/tensorflow/core/kernels/string_ngrams_op_test.cc b/tensorflow/core/kernels/string_ngrams_op_test.cc
index b89de9ad16dab8..0d52283bd8fb9d 100644
--- a/tensorflow/core/kernels/string_ngrams_op_test.cc
+++ b/tensorflow/core/kernels/string_ngrams_op_test.cc
@@ -542,6 +542,40 @@ TEST_F(NgramKernelTest, TestEmptyInput) {
   assert_int64_equal(expected_splits, *GetOutput(1));
 }
 
+TEST_F(NgramKernelTest, TestNoTokens) {
+  MakeOp("|", {3}, "L", "R", -1, false);
+  // Batch items are:
+  // 0:
+  // 1: "a"
+  AddInputFromArray<tstring>(TensorShape({1}), {"a"});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values(
+      {"L|L|R", "L|R|R",             // no input in first split
+       "L|L|a", "L|a|R", "a|R|R"});  // second split
+  std::vector<int64> expected_splits({0, 2, 5});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
+TEST_F(NgramKernelTest, TestNoTokensNoPad) {
+  MakeOp("|", {3}, "", "", 0, false);
+  // Batch items are:
+  // 0:
+  // 1: "a"
+  AddInputFromArray<tstring>(TensorShape({1}), {"a"});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values({});
+  std::vector<int64> expected_splits({0, 0, 0});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
 TEST_F(NgramKernelTest, ShapeFn) {
   ShapeInferenceTestOp op("StringNGrams");
   INFER_OK(op, "?;?", "[?];[?]");
diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc
index e382381e122324..ab83efda2a2e17 100644
--- a/tensorflow/core/kernels/substr_op.cc
+++ b/tensorflow/core/kernels/substr_op.cc
@@ -51,6 +51,11 @@ class SubstrOp : public OpKernel {
     const Tensor& len_tensor = context->input(2);
     const TensorShape& input_shape = input_tensor.shape();
     const TensorShape& pos_shape = pos_tensor.shape();
+    const TensorShape& len_shape = len_tensor.shape();
+    OP_REQUIRES(context, (pos_shape == len_shape),
+                errors::InvalidArgument(
+                    "pos and len should have the same shape, got: ",
+                    pos_shape.DebugString(), " vs. ", len_shape.DebugString()));
 
     bool is_scalar = TensorShapeUtils::IsScalar(pos_shape);
 
diff --git a/tensorflow/core/kernels/topk_op.cc b/tensorflow/core/kernels/topk_op.cc
index 02b99e44880a56..327ed5bfa25f29 100644
--- a/tensorflow/core/kernels/topk_op.cc
+++ b/tensorflow/core/kernels/topk_op.cc
@@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
       return Status::OK();
     }
 
-    auto SortIndices = [&](int start_batch, int limit_batch) {
+    auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
       for (int32 b = start_batch; b < limit_batch; ++b) {
         const T* input_data = &input(b, 0);
         const auto stable_comp = [input_data](const int32 a, const int32 b) {
diff --git a/tensorflow/core/kernels/transpose_functor.h b/tensorflow/core/kernels/transpose_functor.h
index a89fc40d772594..4b07a076296aa4 100644
--- a/tensorflow/core/kernels/transpose_functor.h
+++ b/tensorflow/core/kernels/transpose_functor.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include <numeric>
 #include <string>
 #include <vector>
+
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/platform/logging.h"
@@ -166,7 +167,6 @@ template <typename Device>
 Status DoTransposeImpl(const Device& d, const Tensor& in,
                        const gtl::ArraySlice<int32> perm, bool conjugate,
                        Tensor* out) {
-  CHECK_GE(in.dims(), 2);
   CHECK_EQ(in.dims(), out->dims());
   CHECK_EQ(in.dims(), perm.size());
   CHECK_EQ(in.dtype(), out->dtype());
diff --git a/tensorflow/core/kernels/unicode_ops.cc b/tensorflow/core/kernels/unicode_ops.cc
index 14ae49c837fef9..0e7a24ddd9be79 100644
--- a/tensorflow/core/kernels/unicode_ops.cc
+++ b/tensorflow/core/kernels/unicode_ops.cc
@@ -533,6 +533,17 @@ class UnicodeEncodeOp : public OpKernel {
     const Tensor& input_splits = context->input(1);
     const auto input_splits_flat = input_splits.flat<SPLITS_TYPE>();
 
+    // Operation will treat first argument in input_splits as if it were zero
+    // regardless of its actual value since splits should begin with zero and
+    // end with the length of the input values vector.
+    OP_REQUIRES(
+        context, input_splits_flat(0) == 0,
+        errors::InvalidArgument("First value in input_splits must be zero."));
+    OP_REQUIRES(context,
+                input_splits_flat(input_splits_flat.size() - 1) ==
+                    input_tensor_flat.size(),
+                errors::InvalidArgument("Last value in input_splits must be "
+                                        "equal to length of input_tensor."));
     // Since we limit to a 2-D input (flat_values of rank 1 and a single splits
     // tensor), our output dimension will be 1 with it's size equal to the
     // number of splits (outer dimension or ragged tensor).
@@ -548,6 +559,14 @@ class UnicodeEncodeOp : public OpKernel {
     for (int i = 1; i < input_splits_flat.size(); ++i) {
       icu::UnicodeString unicode_string;
       icu::UnicodeStringAppendable appendable_unicode_string(unicode_string);
+      OP_REQUIRES(
+          context, input_splits_flat(i - 1) <= input_splits_flat(i),
+          errors::InvalidArgument(
+              "Values in input_splits must be equal or in ascending order."));
+      OP_REQUIRES(
+          context, input_splits_flat(i) <= input_tensor_flat.size(),
+          errors::InvalidArgument("Values in input_splits must be less than or "
+                                  "equal to input_tensor length."));
       for (; idx < input_splits_flat(i); ++idx) {
         int32 code_point = input_tensor_flat(idx);
         // Check for invalid code point
diff --git a/tensorflow/core/kernels/unsorted_segment_join_op.cc b/tensorflow/core/kernels/unsorted_segment_join_op.cc
index f0b9388f7cff32..77771cf0fe077a 100644
--- a/tensorflow/core/kernels/unsorted_segment_join_op.cc
+++ b/tensorflow/core/kernels/unsorted_segment_join_op.cc
@@ -90,6 +90,8 @@ class UnsortedSegmentJoinOp : public OpKernel {
     const int32 segment_dims = segment_id_shape.dims();
 
     const Tensor& num_segments_tensor = context->input(2);
+    OP_REQUIRES(context, num_segments_tensor.NumElements() != 0,
+                errors::InvalidArgument("Number of segments cannot be empty."));
     auto num_segments = num_segments_tensor.scalar<NUM_SEGMENTS_TYPE>()();
 
     OP_REQUIRES(context, segment_dims != 0,
diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h
index a133f7e0f17b64..f8a94e346f6d31 100644
--- a/tensorflow/core/lib/bfloat16/bfloat16.h
+++ b/tensorflow/core/lib/bfloat16/bfloat16.h
@@ -21,8 +21,8 @@ limitations under the License.
 
 #include "tensorflow/core/platform/byte_order.h"
 
-#ifdef __CUDACC__
-// All functions callable from CUDA code must be qualified with __device__
+#if defined(__CUDACC__) || (defined(__HIPCC__) && defined(__HIP__))
+// All functions callable from CUDA and hipclang code must be qualified with __device__
 #define B16_DEVICE_FUNC __host__ __device__
 
 #else
diff --git a/tensorflow/core/platform/default/rocm_rocdl_path.cc b/tensorflow/core/platform/default/rocm_rocdl_path.cc
index 0831544f616958..001a87ab83de57 100644
--- a/tensorflow/core/platform/default/rocm_rocdl_path.cc
+++ b/tensorflow/core/platform/default/rocm_rocdl_path.cc
@@ -36,7 +36,11 @@ string RocmRoot() {
 }
 
 string RocdlRoot() {
+#if TENSORFLOW_COMPILER_IS_HIP_CLANG
+  return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "amdgcn/bitcode");
+#else
   return tensorflow::io::JoinPath(tensorflow::RocmRoot(), "hcc/lib");
+#endif
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/rocm_rocdl_path_test.cc b/tensorflow/core/platform/rocm_rocdl_path_test.cc
index 4a4d9b89c59882..4f20749fb49e9e 100644
--- a/tensorflow/core/platform/rocm_rocdl_path_test.cc
+++ b/tensorflow/core/platform/rocm_rocdl_path_test.cc
@@ -27,7 +27,7 @@ TEST(RocmRocdlPathTest, ROCDLPath) {
   VLOG(2) << "ROCm-Deivce-Libs root = " << RocdlRoot();
   std::vector<string> rocdl_files;
   TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
-      io::JoinPath(RocdlRoot(), "*.amdgcn.bc"), &rocdl_files));
+      io::JoinPath(RocdlRoot(), "*.bc"), &rocdl_files));
   EXPECT_LT(0, rocdl_files.size());
 }
 #endif
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 10d6b545b2a254..0f1a7a6ebc3d23 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -22,7 +22,7 @@ limitations under the License.
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
 #define TF_MINOR_VERSION 1
-#define TF_PATCH_VERSION 0
+#define TF_PATCH_VERSION 4
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h
index d33bd03db29515..4cbfa83b21d4a5 100644
--- a/tensorflow/core/util/sparse/sparse_tensor.h
+++ b/tensorflow/core/util/sparse/sparse_tensor.h
@@ -610,6 +610,10 @@ Status SparseTensor::Split(const SparseTensor& input_tensor,
   for (int i = 0; i < input_tensor.indices().dim_size(0); ++i) {
     const int dim = input_tensor.indices().matrix<int64>()(i, split_dim);
     int slice_index = GetSliceIndex(dim, split_size, residual);
+    if (slice_index >= num_values.size()) {
+      return errors::InvalidArgument("Slice index ", slice_index,
+                                     " is larger than num_split.");
+    }
     num_values[slice_index]++;
   }
 
diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
index 727f18d8b6d8f7..19f5e29da2bf38 100644
--- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
+++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml
@@ -33,9 +33,9 @@
         <scala.test.version>2.2.6</scala.test.version>
         <maven.compiler.version>3.0</maven.compiler.version>
         <java.version>1.8</java.version>
-        <spark.version>2.3.1</spark.version>
+        <spark.version>2.4.5</spark.version>
         <yarn.api.version>2.7.3</yarn.api.version>
-        <junit.version>4.11</junit.version>
+        <junit.version>4.13.1</junit.version>
     </properties>
 
     <build>
diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
index e900d81e5dab50..675a3369cf1ff3 100644
--- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml
+++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml
@@ -16,7 +16,7 @@
         <maven.compiler.target>1.6</maven.compiler.target>
         <hadoop.version>2.6.0</hadoop.version>
         <protobuf.version>3.5.1</protobuf.version>
-        <junit.version>4.11</junit.version>
+        <junit.version>4.13.1</junit.version>
     </properties>
 
     <licenses>
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 05c83b3001e544..b9f1994d502e52 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -320,6 +320,7 @@ cc_test(
         "testdata/test_min_runtime.bin",
         "testdata/test_model.bin",
         "testdata/test_model_broken.bin",
+        "testdata/unsupported_recursion.bin",
     ],
     tags = [
         "tflite_not_portable",
diff --git a/tensorflow/lite/c/c_api_internal.c b/tensorflow/lite/c/c_api_internal.c
index 28a430daf55096..9c2e2eac8214e9 100644
--- a/tensorflow/lite/c/c_api_internal.c
+++ b/tensorflow/lite/c/c_api_internal.c
@@ -43,8 +43,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 #ifndef TF_LITE_STATIC_MEMORY
 
 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
-  TfLiteIntArray* ret =
-      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
+  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
+  if (alloc_size <= 0) return NULL;
+  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
+  if (!ret) return ret;
   ret->size = size;
   return ret;
 }
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index b298ffa769b101..50d7e1cb724b2b 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include <algorithm>
 
 #include "tensorflow/lite/arena_planner.h"
+#include "tensorflow/lite/builtin_ops.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/context_util.h"
 #include "tensorflow/lite/core/api/tensor_utils.h"
@@ -116,6 +117,38 @@ TfLiteQuantizationParams GetLegacyQuantization(
   return legacy_quantization;
 }
 
+// An utility test to detect if the subgraph is abused:
+// 1. Detects if recursion exists in the graph (recursion is not currently
+//    supported.
+// 2. Detects if the interpreter / subgraph is used in multiple subgraphs.
+//    Note: It's clearly documented that the interpreter / subgraph are not
+//    thread-safe. This serves as a check with possible false negatives
+//    unless we switch to atomic boolean flags.
+class SubgraphGuard {
+ public:
+  SubgraphGuard(TfLiteContext* context, bool* is_subgraph_in_use)
+      : is_subgraph_in_use_(is_subgraph_in_use) {
+    if (*is_subgraph_in_use_) {
+      context->ReportError(context, "Subgraph is already in use.");
+      status_ = kTfLiteError;
+    } else {
+      *is_subgraph_in_use_ = true;
+    }
+  }
+  ~SubgraphGuard() {
+    // If tht original status was OK, recover the boolean flag.
+    if (status_ == kTfLiteOk) {
+      *is_subgraph_in_use_ = false;
+    }
+  }
+
+  TfLiteStatus status() const { return status_; }
+
+ private:
+  TfLiteStatus status_ = kTfLiteOk;
+  bool* is_subgraph_in_use_;
+};
+
 }  // namespace
 
 // A trivial implementation of GraphInfo around the Interpreter.
@@ -465,6 +498,33 @@ TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
   return kTfLiteOk;
 }
 
+// We have two arrays and we need to check that elements from one array don't
+// show up in the other. We could sort both arrays and then iterate with two
+// pointers from start to finish always increasing the smaller one but since
+// these arrays are usually short (<25 elements for inputs, usually <3 for
+// outputs), this might be slower than the naive approach (if arrays have size n
+// and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
+// O(n)). Plus, sorting the input and output arrays might not be something we
+// want as it destroys ordering of elements.
+//
+// If it turns out that this is an issue, we can switch to the other algorithm.
+TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
+                                                     int num_inputs,
+                                                     const int* output_indices,
+                                                     int num_outputs) {
+  for (int i = 0; i < num_inputs; i++) {
+    for (int j = 0; j < num_outputs; j++) {
+      if (input_indices[i] == output_indices[j]) {
+        ReportError("Tensor %d is both input %d and output %d\n",
+                    input_indices[i], i, j);
+        consistent_ = false;
+        return kTfLiteError;
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
                                      size_t dims_size, size_t* bytes) {
   // TODO(aselle): Check for overflow here using overflow.h in TensorFlow
@@ -480,6 +540,7 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
 
 TfLiteStatus Subgraph::AllocateTensors() {
   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
+
   if (!consistent_) {
     ReportError("AllocateTensors() called on inconsistent model.");
     return kTfLiteError;
@@ -496,6 +557,12 @@ TfLiteStatus Subgraph::AllocateTensors() {
     return kTfLiteOk;
   }
 
+  // Note `AllocateTensors` sometimes calls itself recursively above
+  // for delegates. Therefore only the logic below need to be guarded
+  // by `SubgraphGuard`.
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   next_execution_plan_index_to_prepare_ = 0;
   next_execution_plan_index_to_plan_allocation_ = 0;
   if (memory_planner_) {
@@ -552,6 +619,16 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
       &context_,
       CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
 
+  // For builtin ops, inputs and outputs must not overlap. Custom ops must do
+  // this check by themselves if they don't support overlapping tensors. This
+  // distinction is to allow custom ops to just forward a tensor, reusing it as
+  // both input and output.
+  if (builtin_data != nullptr) {
+    TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
+                                     inputs.data(), inputs.size(),
+                                     outputs.data(), outputs.size()));
+  }
+
   int new_node_index = nodes_and_registration_.size();
   if (node_index) *node_index = new_node_index;
   nodes_and_registration_.resize(nodes_and_registration_.size() + 1);
@@ -706,6 +783,9 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() {
 }
 
 TfLiteStatus Subgraph::Invoke() {
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   if (!consistent_) {
     ReportError("Invoke called on model that is not consistent.");
     return kTfLiteError;
@@ -755,6 +835,26 @@ TfLiteStatus Subgraph::Invoke() {
           tensor->data_is_stale) {
         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
       }
+      if (tensor->data.raw == nullptr && tensor->bytes > 0) {
+        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
+            tensor->dims->size != 1) {
+          // In general, having a tensor here with no buffer will be an error.
+          // However, for the reshape operator, the second input tensor is
+          // sometimes only used for the shape, not for the data. Thus, null
+          // buffer is ok in this situation.
+          // The situation where null buffer is not ok for reshape operator is
+          // only when there are 2 inputs given to the node and the one
+          // corresponding to the shape (i == 1) is a vector that contains all
+          // dimensions. See `GetOutputShape()` function in
+          // `tensorflow/lite/kernels/reshape.cc`
+          continue;
+        } else {
+          // In all other cases, we need to return an error as otherwise we will
+          // trigger a null pointer dereference (likely).
+          ReportError("Input tensor %d lacks data", tensor_index);
+          return kTfLiteError;
+        }
+      }
     }
 
     if (check_cancelled_func_ != nullptr &&
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
index 17310447c163d2..54ee2ba5200f82 100644
--- a/tensorflow/lite/core/subgraph.h
+++ b/tensorflow/lite/core/subgraph.h
@@ -399,6 +399,15 @@ class Subgraph {
   TfLiteStatus CheckTensorIndices(const char* label, const int* indices,
                                   int length);
 
+  // Check that the input indices and the output indices don't overlap.
+  // This is needed because same tensor must not be used both as input and
+  // output for an operator.
+  // NOTE: this changes consistent_ to be false if indices are out of bounds.
+  TfLiteStatus CheckInputAndOutputForOverlap(const int* input_indices,
+                                             int num_inputs,
+                                             const int* output_indices,
+                                             int num_outputs);
+
   // Compute the number of bytes required to represent a tensor with dimensions
   // specified by the array dims (of length dims_size). Returns the status code
   // and bytes.
@@ -633,6 +642,10 @@ class Subgraph {
   // A map of resource variables. Owned by interpreter and shared by multiple
   // subgraphs.
   ResourceVariableMap* resource_variables_ = nullptr;
+
+// Whether the subgraph is currently in use (e.g. running the `Invoke`
+  // or `AllocateTensors` functions).
+  bool is_subgraph_in_use_ = false;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/arg_min_max.cc b/tensorflow/lite/kernels/arg_min_max.cc
index 593e604aff64c9..de80be5509d776 100644
--- a/tensorflow/lite/kernels/arg_min_max.cc
+++ b/tensorflow/lite/kernels/arg_min_max.cc
@@ -36,6 +36,9 @@ TfLiteStatus ResizeOutput(TfLiteContext* context, const TfLiteTensor* input,
     axis_value += NumDimensions(input);
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
+
   // Copy the input dimensions to output except the axis dimension.
   TfLiteIntArray* output_dims = TfLiteIntArrayCreate(NumDimensions(input) - 1);
   int j = 0;
diff --git a/tensorflow/lite/kernels/concatenation.cc b/tensorflow/lite/kernels/concatenation.cc
index 870a10abfc4035..6ee31b248fe6ac 100644
--- a/tensorflow/lite/kernels/concatenation.cc
+++ b/tensorflow/lite/kernels/concatenation.cc
@@ -19,6 +19,8 @@ limitations under the License.
 #include <iostream>
 #include <limits>
 
+#include <limits>
+
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
@@ -69,6 +71,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_EQ(context, t->type, input_type);
     for (int d = 0; d < t0->dims->size; ++d) {
       if (d == axis) {
+        // Avoid integer overflow in sum_axis below
+        TF_LITE_ENSURE(context, t->dims->data[axis] >= 0);
+        TF_LITE_ENSURE(context, t->dims->data[axis] <=
+                                    std::numeric_limits<int>::max() - sum_axis);
         sum_axis += t->dims->data[axis];
       } else {
         TF_LITE_ENSURE_EQ(context, t->dims->data[d], t0->dims->data[d]);
diff --git a/tensorflow/lite/kernels/conv.cc b/tensorflow/lite/kernels/conv.cc
index 49090075626cff..c882edcfb03a63 100644
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@@ -427,6 +427,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
     // Only one scale factor per batch is typically necessary. See optimized
     // implementation for why we need to allocate for the height of the inputs
     // flattened to 2D.
+    TF_LITE_ENSURE(context, channels_in != 0);
     const int height = NumElements(input) / channels_in;
     int scaling_dims[1] = {height};
     if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
@@ -449,6 +450,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
       input_offsets->type = kTfLiteInt32;
       input_offsets->allocation_type = kTfLiteArenaRw;
       // See above comment for the need to allocate for height of inputs.
+      TF_LITE_ENSURE(context, channels_in != 0);
       const int height = NumElements(input) / channels_in;
       int scaling_dims[1] = {height};
       if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, scaling_dims)) {
@@ -647,17 +649,19 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
 }
 
 template <KernelType kernel_type>
-void EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
-                          TfLiteConvParams* params, OpData* data,
-                          TfLiteTensor* input, TfLiteTensor* filter,
-                          TfLiteTensor* bias, TfLiteTensor* im2col,
-                          TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
+TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
+                                  TfLiteConvParams* params, OpData* data,
+                                  TfLiteTensor* input, TfLiteTensor* filter,
+                                  TfLiteTensor* bias, TfLiteTensor* im2col,
+                                  TfLiteTensor* hwcn_weights,
+                                  TfLiteTensor* output) {
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
   const TfLiteTensor* input_quantized =
       GetTemporary(context, node, data->input_quantized_index);
   int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
@@ -718,16 +722,18 @@ void EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
 }
 
 template <KernelType kernel_type>
-void EvalHybrid(TfLiteContext* context, TfLiteNode* node,
-                TfLiteConvParams* params, OpData* data, TfLiteTensor* input,
-                TfLiteTensor* filter, TfLiteTensor* bias, TfLiteTensor* im2col,
-                TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
+TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteConvParams* params, OpData* data,
+                       TfLiteTensor* input, TfLiteTensor* filter,
+                       TfLiteTensor* bias, TfLiteTensor* im2col,
+                       TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
 
   float* input_ptr = GetTensorData<float>(input);
   int8_t* quantized_input_ptr_batch = GetTensorData<int8_t>(
@@ -807,12 +813,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteFloat32:
       if (filter->type == kTfLiteUInt8 || filter->type == kTfLiteInt8) {
         if (is_hybrid_per_channel) {
-          EvalHybridPerChannel<kernel_type>(context, node, params, data, input,
-                                            filter, bias, im2col, hwcn_weights,
-                                            output);
+          TF_LITE_ENSURE_OK(context, EvalHybridPerChannel<kernel_type>(
+                                         context, node, params, data, input,
+                                         filter, bias, im2col, hwcn_weights,
+                                         output));
         } else {
-          EvalHybrid<kernel_type>(context, node, params, data, input, filter,
-                                  bias, im2col, hwcn_weights, output);
+          TF_LITE_ENSURE_OK(context,
+                            EvalHybrid<kernel_type>(context, node, params, data,
+                                                    input, filter, bias, im2col,
+                                                    hwcn_weights, output));
         }
       } else {
         EvalFloat<kernel_type>(context, node, params, data, input, filter, bias,
diff --git a/tensorflow/lite/kernels/depth_to_space.cc b/tensorflow/lite/kernels/depth_to_space.cc
index 561a4340698ab5..1d16804ccf61c2 100644
--- a/tensorflow/lite/kernels/depth_to_space.cc
+++ b/tensorflow/lite/kernels/depth_to_space.cc
@@ -55,6 +55,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   const int input_channels = input->dims->data[3];
diff --git a/tensorflow/lite/kernels/depth_to_space_test.cc b/tensorflow/lite/kernels/depth_to_space_test.cc
index 8d59a1ad82f418..8a73676fa1c6ad 100644
--- a/tensorflow/lite/kernels/depth_to_space_test.cc
+++ b/tensorflow/lite/kernels/depth_to_space_test.cc
@@ -55,6 +55,11 @@ TEST(DepthToSpaceOpModel, BadBlockSize) {
   EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 4),
                "Cannot allocate tensors");
 }
+
+TEST(DepthToSpaceOpModel, NoBlockSize) {
+  EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 0),
+               "Cannot allocate tensors");
+}
 #endif
 
 TEST(DepthToSpaceOpModel, Float32) {
diff --git a/tensorflow/lite/kernels/depthwise_conv.cc b/tensorflow/lite/kernels/depthwise_conv.cc
index 38a3f34709e5bc..659acdd9e020ac 100644
--- a/tensorflow/lite/kernels/depthwise_conv.cc
+++ b/tensorflow/lite/kernels/depthwise_conv.cc
@@ -175,8 +175,8 @@ TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
                                     int16* depth_multiplier) {
   int num_filter_channels = SizeOfDimension(filter, 3);
   int num_input_channels = SizeOfDimension(input, 3);
+  TF_LITE_ENSURE(context, num_input_channels != 0);
   TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
-
   *depth_multiplier = num_filter_channels / num_input_channels;
   return kTfLiteOk;
 }
diff --git a/tensorflow/lite/kernels/embedding_lookup.cc b/tensorflow/lite/kernels/embedding_lookup.cc
index c97cffe14e18eb..ca76e1068cd338 100644
--- a/tensorflow/lite/kernels/embedding_lookup.cc
+++ b/tensorflow/lite/kernels/embedding_lookup.cc
@@ -74,6 +74,10 @@ TfLiteStatus EvalSimple(TfLiteContext* context, TfLiteNode* node,
                         const TfLiteTensor* lookup, const TfLiteTensor* value,
                         TfLiteTensor* output) {
   const int row_size = SizeOfDimension(value, 0);
+  if (row_size == 0) {
+    // Propagate empty tensor if input is empty
+    return kTfLiteOk;
+  }
   const int row_bytes = value->bytes / row_size;
 
   char* output_raw = GetTensorData<char>(output);
diff --git a/tensorflow/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/lite/kernels/embedding_lookup_sparse.cc
index 9546db7b7959b1..844cf30ce11311 100644
--- a/tensorflow/lite/kernels/embedding_lookup_sparse.cc
+++ b/tensorflow/lite/kernels/embedding_lookup_sparse.cc
@@ -160,6 +160,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Resize output tensor.
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
+  TF_LITE_ENSURE(context, output_shape != nullptr);
   int k = 0;
   int embedding_size = 1;
   int lookup_size = 1;
diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
index 20e98652ee57ec..b8698b3ea2021b 100644
--- a/tensorflow/lite/kernels/gather_nd.cc
+++ b/tensorflow/lite/kernels/gather_nd.cc
@@ -130,6 +130,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* indices = GetInput(context, node, kIndices);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
+  // Prevent division by 0 in the helper
+  TF_LITE_ENSURE(context, NumElements(params) > 0);
+
   switch (indices->type) {
     case kTfLiteInt32:
       return EvalGatherNd<int32_t>(context, params, indices, output);
diff --git a/tensorflow/lite/kernels/hashtable_lookup.cc b/tensorflow/lite/kernels/hashtable_lookup.cc
index da1116cf858667..19fde91e0a3ac9 100644
--- a/tensorflow/lite/kernels/hashtable_lookup.cc
+++ b/tensorflow/lite/kernels/hashtable_lookup.cc
@@ -106,6 +106,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* value = GetInput(context, node, 2);
 
   const int num_rows = SizeOfDimension(value, 0);
+  TF_LITE_ENSURE(context, num_rows != 0);
   const int row_bytes = value->bytes / num_rows;
   void* pointer = nullptr;
   DynamicBuffer buf;
diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index 8a0ab56a68912e..63d5d317571f5d 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -2654,6 +2654,9 @@ inline bool ResolveAxis(const int num_dims, const int* axis,
     // Handle negative index.
     int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
     TFLITE_DCHECK(current >= 0 && current < num_dims);
+    if (current < 0 || current >= num_dims) {
+      return false;
+    }
     bool is_dup = false;
     for (int j = 0; j < *out_num_axis; ++j) {
       if (out_axis[j] == current) {
diff --git a/tensorflow/lite/kernels/internal/types.h b/tensorflow/lite/kernels/internal/types.h
index 38769d1bc0bb05..309c52ba52242f 100644
--- a/tensorflow/lite/kernels/internal/types.h
+++ b/tensorflow/lite/kernels/internal/types.h
@@ -432,7 +432,7 @@ int MatchingArraySize(const ArrayType1& array1, int index1,
 inline int MatchingDim(const RuntimeShape& shape1, int index1,
                        const RuntimeShape& shape2, int index2) {
   TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2));
-  return shape1.Dims(index1);
+  return std::min(shape1.Dims(index1), shape2.Dims(index2));
 }
 
 template <typename... Args>
diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h
index 1026cfcefeb2da..62b61fc36701b5 100644
--- a/tensorflow/lite/kernels/kernel_util.h
+++ b/tensorflow/lite/kernels/kernel_util.h
@@ -30,28 +30,45 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
 }
 inline const TfLiteTensor* GetInput(TfLiteContext* context,
                                     const TfLiteNode* node, int index) {
-  return &context
-              ->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
+  const int tensor_index = flatbuffers::EndianScalar(node->inputs->data[index]);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return &context->tensors[tensor_index];
 }
 inline TfLiteTensor* GetVariableInput(TfLiteContext* context,
                                       const TfLiteNode* node, int index) {
-  TfLiteTensor* tensor =
-      &context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
+  const int tensor_index = flatbuffers::EndianScalar(node->inputs->data[index]);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  TfLiteTensor* tensor = &context->tensors[tensor_index];
   return (tensor->is_variable) ? tensor : nullptr;
 }
 inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
                                int index) {
-  return &context
-              ->tensors[flatbuffers::EndianScalar(node->outputs->data[index])];
+  const int tensor_index = flatbuffers::EndianScalar(node->outputs->data[index]);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return &context->tensors[tensor_index];
 }
 inline TfLiteTensor* GetTemporary(TfLiteContext* context,
                                   const TfLiteNode* node, int index) {
-  return &context->tensors[flatbuffers::EndianScalar(
-      node->temporaries->data[index])];
+  const int tensor_index = flatbuffers::EndianScalar(node->temporaries->data[index]);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return &context->tensors[tensor_index];
 }
+
 inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
                                             const TfLiteNode* node, int index) {
-  return &context->tensors[node->intermediates->data[index]];
+  const int tensor_index = flatbuffers::EndianScalar(node->intermediates->data[index]);
+  if (tensor_index < 0) {
+    return nullptr;
+  }
+  return &context->tensors[tensor_index];
 }
 inline int NumInputs(const TfLiteNode* node) { return node->inputs->size; }
 inline int NumOutputs(const TfLiteNode* node) { return node->outputs->size; }
@@ -74,12 +91,7 @@ inline int64_t NumElements(const TfLiteTensor* t) {
 inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
                                                   const TfLiteNode* node,
                                                   int index) {
-  const bool use_tensor = node->inputs->data[index] != kOptionalTensor;
-  if (use_tensor) {
-    return &context
-                ->tensors[flatbuffers::EndianScalar(node->inputs->data[index])];
-  }
-  return nullptr;
+  return GetInput(context, node, index);
 }
 
 // Determines whether tensor is constant.
diff --git a/tensorflow/lite/kernels/maximum_minimum.cc b/tensorflow/lite/kernels/maximum_minimum.cc
index 76e51d18741be5..690d9568c43f38 100644
--- a/tensorflow/lite/kernels/maximum_minimum.cc
+++ b/tensorflow/lite/kernels/maximum_minimum.cc
@@ -100,6 +100,12 @@ template <KernelType kernel_type, typename OpType>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
 
+  // If inputs have no element, shortcircuit.
+  if (NumElements(op_context.input1) == 0 ||
+      NumElements(op_context.input2) == 0) {
+    return kTfLiteOk;
+  }
+
   if (kernel_type == kReference) {
     switch (op_context.output->type) {
       case kTfLiteFloat32:
diff --git a/tensorflow/lite/kernels/one_hot.cc b/tensorflow/lite/kernels/one_hot.cc
index 2ac12fe9308f38..fd4f67da0943be 100644
--- a/tensorflow/lite/kernels/one_hot.cc
+++ b/tensorflow/lite/kernels/one_hot.cc
@@ -67,6 +67,11 @@ void OneHotComputeImpl(const OneHotContext& op_context) {
   for (int i = 0; i < op_context.axis; ++i) {
     prefix_dim_size *= op_context.indices->dims->data[i];
   }
+  if (prefix_dim_size == 0) {
+    // If indices tensor is degenerate, return a degenerate tensor, just like
+    // TensorFlow does.
+    return;
+  }
   const int suffix_dim_size = NumElements(op_context.indices) / prefix_dim_size;
   const int depth = *op_context.depth->data.i32;
 
diff --git a/tensorflow/lite/kernels/padding.h b/tensorflow/lite/kernels/padding.h
index 1116b1da852cf6..6b4ab7fa58d1aa 100644
--- a/tensorflow/lite/kernels/padding.h
+++ b/tensorflow/lite/kernels/padding.h
@@ -44,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
 inline int ComputeOutSize(TfLitePadding padding, int image_size,
                           int filter_size, int stride, int dilation_rate = 1) {
   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+
+  // TODO(b/186448822): This uses 0 since the function has no other way to
+  // report error case
+  if (stride == 0) return 0;
+
   switch (padding) {
     case kTfLitePaddingSame:
       return (image_size + stride - 1) / stride;
diff --git a/tensorflow/lite/kernels/pooling.cc b/tensorflow/lite/kernels/pooling.cc
index 71dd349481c343..fabcb833ceab40 100644
--- a/tensorflow/lite/kernels/pooling.cc
+++ b/tensorflow/lite/kernels/pooling.cc
@@ -83,6 +83,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
   auto padding = params->padding;
   int out_width, out_height;
 
+  // Prevent division by 0 in optimized pooling implementations
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   data->padding = ComputePaddingHeightWidth(
       params->stride_height, params->stride_width, 1, 1, height, width,
       params->filter_height, params->filter_width, padding, &out_height,
diff --git a/tensorflow/lite/kernels/pooling_test.cc b/tensorflow/lite/kernels/pooling_test.cc
index 1b371361a4d875..ec3ba072e7c9c3 100644
--- a/tensorflow/lite/kernels/pooling_test.cc
+++ b/tensorflow/lite/kernels/pooling_test.cc
@@ -1083,5 +1083,18 @@ TEST(FloatPoolingOpTest, L2PoolPaddingValidSlide1) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.0, 6.5}));
 }
 
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(FloatPoolingOpTest, MaxPoolWithZeroStride) {
+  EXPECT_DEATH(
+      FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D,
+                            /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+                            /*filter_width=*/2, /*filter_height=*/2,
+                            /*output=*/{TensorType_FLOAT32, {}},
+                            /*padding=*/Padding_VALID,
+                            /*stride_w=*/0, /*stride_h=*/0),
+      "Cannot allocate tensors");
+}
+#endif
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/space_to_batch_nd.cc b/tensorflow/lite/kernels/space_to_batch_nd.cc
index 2fb7198cd67e8b..c045e799ebfed4 100644
--- a/tensorflow/lite/kernels/space_to_batch_nd.cc
+++ b/tensorflow/lite/kernels/space_to_batch_nd.cc
@@ -73,6 +73,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
   for (int dim = 0; dim < kSpatialDimensionNum; ++dim) {
     int final_dim_size = (input_size->data[dim + 1] + paddings_data[dim * 2] +
                           paddings_data[dim * 2 + 1]);
+    TF_LITE_ENSURE(context, block_shape[dim] != 0);
     TF_LITE_ENSURE_EQ(context, final_dim_size % block_shape[dim], 0);
     output_size->data[dim + 1] = final_dim_size / block_shape[dim];
   }
diff --git a/tensorflow/lite/kernels/space_to_depth.cc b/tensorflow/lite/kernels/space_to_depth.cc
index 573ffe66e50a6c..56cc1deb7b8a3e 100644
--- a/tensorflow/lite/kernels/space_to_depth.cc
+++ b/tensorflow/lite/kernels/space_to_depth.cc
@@ -55,6 +55,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   int output_height = input_height / block_size;
diff --git a/tensorflow/lite/kernels/split.cc b/tensorflow/lite/kernels/split.cc
index ec3dd54ebf723a..f258d5b35327af 100644
--- a/tensorflow/lite/kernels/split.cc
+++ b/tensorflow/lite/kernels/split.cc
@@ -57,6 +57,7 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
   TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
 
   const int input_size = SizeOfDimension(input, axis_value);
+  TF_LITE_ENSURE(context, num_splits != 0);
   TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0,
                      "Not an even split");
   const int slice_size = input_size / num_splits;
diff --git a/tensorflow/lite/kernels/split_v.cc b/tensorflow/lite/kernels/split_v.cc
index b888e8adfa6b12..a5fdf89ac199d8 100644
--- a/tensorflow/lite/kernels/split_v.cc
+++ b/tensorflow/lite/kernels/split_v.cc
@@ -90,6 +90,8 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
     }
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
   const int input_size = SizeOfDimension(input, axis_value);
 
   if (minus_one_index != -1) {
diff --git a/tensorflow/lite/kernels/svdf.cc b/tensorflow/lite/kernels/svdf.cc
index ae6d85d6e6cddb..16714a37426526 100644
--- a/tensorflow/lite/kernels/svdf.cc
+++ b/tensorflow/lite/kernels/svdf.cc
@@ -97,6 +97,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int rank = params->rank;
   const int batch_size = input->dims->data[0];
   const int num_filters = weights_feature->dims->data[0];
+  TF_LITE_ENSURE(context, rank != 0);
   TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
   const int num_units = num_filters / rank;
   const int memory_size = weights_time->dims->data[1];
diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc
index c4447b2a468e52..98348bb343a64b 100644
--- a/tensorflow/lite/kernels/transpose_conv.cc
+++ b/tensorflow/lite/kernels/transpose_conv.cc
@@ -392,6 +392,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data);
 
+  // Prevent divisions by 0
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   // Resize any deferred dynamic tensors
   if (IsDynamicTensor(output)) {
     TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output));
diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index eb757270448508..8998ce2299befb 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -567,6 +567,11 @@ TfLiteStatus InterpreterBuilder::operator()(
     return cleanup_and_error();
   }
 
+  if (!buffers) {
+    error_reporter_->Report("No buffers in the model.\n");
+    return cleanup_and_error();
+  }
+
   interpreter->reset(new Interpreter(error_reporter_));
   (*interpreter)->SetNumThreads(num_threads);
   if (subgraphs->Length() > 1) {
@@ -580,9 +585,9 @@ TfLiteStatus InterpreterBuilder::operator()(
         (*interpreter)->subgraph(subgraph_index);
     auto operators = subgraph->operators();
     auto tensors = subgraph->tensors();
-    if (!operators || !tensors || !buffers) {
+    if (!operators || !tensors) {
       error_reporter_->Report(
-          "Did not get operators, tensors, or buffers in subgraph %d.\n",
+          "Did not get operators or tensors in subgraph %d.\n",
           subgraph_index);
       return cleanup_and_error();
     }
diff --git a/tensorflow/lite/model_test.cc b/tensorflow/lite/model_test.cc
index 7dc582b886289d..698055a8acfbe6 100644
--- a/tensorflow/lite/model_test.cc
+++ b/tensorflow/lite/model_test.cc
@@ -331,6 +331,25 @@ TEST(BasicFlatBufferModel, TestReadRuntimeVersionFromModel) {
   ASSERT_EQ(model2->GetMinimumRuntime(), "1.10.0");
 }
 
+// Recursion & reentrant are not supported in TFLite.
+// The test ensures it fails gracefullly instead of crashing with
+// a stack overflow.
+TEST(BasicFlatBufferModel, TestUnsupportedRecursion) {
+  const auto model_path =
+      "third_party/tensorflow/lite/testdata/unsupported_recursion.bin";
+
+  std::unique_ptr<tflite::FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(model_path);
+  ASSERT_NE(model, nullptr);
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
 // TODO(aselle): Add tests for serialization of builtin op data types.
 // These tests will occur with the evaluation tests of individual operators,
 // not here.
diff --git a/tensorflow/lite/testdata/unsupported_recursion.bin b/tensorflow/lite/testdata/unsupported_recursion.bin
new file mode 100644
index 00000000000000..525c5383ab4ef6
Binary files /dev/null and b/tensorflow/lite/testdata/unsupported_recursion.bin differ
diff --git a/tensorflow/lite/tools/make/Makefile b/tensorflow/lite/tools/make/Makefile
index f67094f37b409f..1aac9e28d7ee68 100644
--- a/tensorflow/lite/tools/make/Makefile
+++ b/tensorflow/lite/tools/make/Makefile
@@ -112,10 +112,11 @@ $(wildcard tensorflow/lite/kernels/internal/reference/*.cc) \
 $(PROFILER_SRCS) \
 tensorflow/lite/tools/make/downloads/farmhash/src/farmhash.cc \
 tensorflow/lite/tools/make/downloads/fft2d/fftsg.c \
+tensorflow/lite/tools/make/downloads/fft2d/fftsg2d.c \
 tensorflow/lite/tools/make/downloads/flatbuffers/src/util.cpp
 CORE_CC_ALL_SRCS += \
 	$(shell find tensorflow/lite/tools/make/downloads/absl/absl/ \
-	             -type f -name \*.cc | grep -v test | grep -v benchmark | grep -v synchronization | grep -v debugging)
+	             -type f -name \*.cc | grep -v test | grep -v benchmark | grep -v synchronization | grep -v debugging | grep -v hash | grep -v flags)
 endif
 # Remove any duplicates.
 CORE_CC_ALL_SRCS := $(sort $(CORE_CC_ALL_SRCS))
@@ -129,6 +130,7 @@ $(wildcard tensorflow/lite/*/*/*test.cc) \
 $(wildcard tensorflow/lite/*/*/*/*test.cc) \
 $(wildcard tensorflow/lite/kernels/*test_main.cc) \
 $(wildcard tensorflow/lite/kernels/*test_util*.cc) \
+tensorflow/lite/experimental/ruy/tune_tool.cc \
 $(MINIMAL_SRCS)
 
 BUILD_WITH_MMAP ?= true
diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh
index 4b4df1e9f9d1b1..25e7d6b7894993 100755
--- a/tensorflow/lite/tools/make/download_dependencies.sh
+++ b/tensorflow/lite/tools/make/download_dependencies.sh
@@ -29,7 +29,7 @@ if [ ! -f $BZL_FILE_PATH ]; then
   exit 1;
 fi
 
-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.tensorflow | head -n1)"
+EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.tensorflow | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index f2ca67521f257c..b105890c29084b 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -2864,6 +2864,7 @@ tf_py_test(
         ":framework_for_generated_wrappers",
         "//third_party/py/numpy",
     ],
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -4412,6 +4413,7 @@ cuda_py_test(
         "//tensorflow/python/eager:def_function",
     ],
     shard_count = 2,
+    tags = ["no_pip"],
 )
 
 cuda_py_test(
@@ -4458,6 +4460,7 @@ cuda_py_test(
         ":platform",
         "//third_party/py/numpy",
     ],
+    tags = ["no_windows_gpu"],
 )
 
 cuda_py_test(
@@ -5909,6 +5912,7 @@ tf_py_test(
         "client_testlib",
         "framework_test_lib",
     ],
+    tags = ["no_rocm"],
 )
 
 tf_py_test(
@@ -5931,7 +5935,10 @@ tf_py_test(
         ":errors",
         ":lib",
     ],
-    tags = ["no_windows"],
+    tags = [
+        "no_rocm",
+        "no_windows",
+    ],
 )
 
 tf_py_test(
@@ -6633,6 +6640,7 @@ tf_py_test(
         ":random_ops",
     ],
     main = "layers/pooling_test.py",
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
diff --git a/tensorflow/python/compat/BUILD b/tensorflow/python/compat/BUILD
index 78f14631055b5d..cac04d9eb00dcb 100644
--- a/tensorflow/python/compat/BUILD
+++ b/tensorflow/python/compat/BUILD
@@ -15,6 +15,7 @@ py_library(
         "//tensorflow/python:control_flow_v2_toggles",
         "//tensorflow/python:tf2",
         "//tensorflow/python:util",
+        "//tensorflow/python/eager:monitoring",
     ],
 )
 
diff --git a/tensorflow/python/compat/v2_compat.py b/tensorflow/python/compat/v2_compat.py
index 6c16e600d74636..eac841fb2fee92 100644
--- a/tensorflow/python/compat/v2_compat.py
+++ b/tensorflow/python/compat/v2_compat.py
@@ -25,6 +25,7 @@
 from tensorflow.python.data.experimental.ops import readers as exp_readers
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
+from tensorflow.python.eager import monitoring
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import control_flow_v2_toggles
@@ -32,6 +33,11 @@
 
 from tensorflow.python.util.tf_export import tf_export
 
+# Metrics to track the status of v2_behavior
+_v2_behavior_usage_gauge = monitoring.BoolGauge(
+    "/tensorflow/version/v2_behavior",
+    "whether v2_behavior is enabled or disabled", "status")
+
 
 @tf_export(v1=["enable_v2_behavior"])
 def enable_v2_behavior():
@@ -45,6 +51,7 @@ def enable_v2_behavior():
   This function is called in the main TensorFlow `__init__.py` file, user should
   not need to call it, except during complex migrations.
   """
+  _v2_behavior_usage_gauge.get_cell("enable").set(True)
   # TF2 behavior is enabled if either 1) enable_v2_behavior() is called or
   # 2) the TF2_BEHAVIOR=1 environment variable is set.  In the latter case,
   # the modules below independently check if tf2.enabled().
@@ -82,6 +89,7 @@ def disable_v2_behavior():
 
   User can call this function to disable 2.x behavior during complex migrations.
   """
+  _v2_behavior_usage_gauge.get_cell("disable").set(True)
   tf2.disable()
   ops.disable_eager_execution()
   tensor_shape.disable_v2_tensorshape()  # Also switched by tf2
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
index 7770323fc487ee..89f618f1dafee2 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD
@@ -105,6 +105,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = [
         "no_pip",
+        "no_rocm",
         "no_windows",
         "notsan",
     ],
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 7eb9baac19b8e4..3b2687360f5aa4 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -878,6 +878,7 @@ py_test(
     srcs = ["wrappers/framework_test.py"],
     python_version = "PY2",
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         ":debug_data",
         ":framework",
@@ -1094,6 +1095,7 @@ py_test(
     srcs = ["cli/debugger_cli_common_test.py"],
     python_version = "PY2",
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         ":debugger_cli_common",
         "//tensorflow/python:framework_test_lib",
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index 5bc654c21849b4..47fc2dce1745e6 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -703,6 +703,7 @@ cuda_py_test(
     ],
     tags = [
         "no_mac",
+        "no_rocm",
         "no_windows",
     ],
     xla_enabled = True,
@@ -790,6 +791,7 @@ tpu_py_test(
     name = "remote_cloud_tpu_test",
     srcs = ["remote_cloud_tpu_test.py"],
     tags = [
+        "no_rocm",
         "notap",
     ],
     deps = [
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index 38c3657ef58ec8..5021a53619e766 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -113,6 +113,7 @@ tf_py_test(
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
+        "no_rocm",
         "no_windows",
     ],
 )
@@ -163,6 +164,7 @@ tf_py_test(
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
+        "no_rocm",
         "no_windows",
     ],
 )
@@ -242,7 +244,10 @@ py_test(
     srcs = ["sequence_feature_column_integration_test.py"],
     python_version = "PY2",
     srcs_version = "PY2AND3",
-    tags = ["no_pip"],
+    tags = [
+        "no_mac",
+        "no_pip",
+    ],
     deps = [
         ":feature_column_v2",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 05809accba348b..1a2bed91ded41d 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -585,7 +585,10 @@ tf_py_test(
         "//tensorflow/python:nn_ops",
     ],
     shard_count = 16,
-    tags = ["notsan"],
+    tags = [
+        "no_rocm",
+        "notsan",
+    ],
 )
 
 tf_py_test(
@@ -750,6 +753,7 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 11,
+    tags = ["no_rocm"],
 )
 
 tf_py_test(
@@ -1034,7 +1038,10 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 4,
-    tags = ["notsan"],  # http://b/62136390
+    tags = [
+        "no_rocm",
+        "notsan",  # http://b/62136390
+    ],
 )
 
 tf_py_test(
@@ -1049,6 +1056,7 @@ tf_py_test(
     ],
     shard_count = 4,
     tags = [
+        "no_rocm",
         "noasan",  # times out b/63678675
         "notsan",  # http://b/62189182
     ],
@@ -1065,6 +1073,7 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
     ],
     shard_count = 10,
+    tags = ["no_rocm"],
 )
 
 cuda_py_test(
@@ -1812,6 +1821,7 @@ tf_py_test(
         "//tensorflow/python:util",
     ],
     shard_count = 4,
+    tags = ["no_rocm"],
 )
 
 tf_py_test(
diff --git a/tensorflow/python/keras/utils/generic_utils.py b/tensorflow/python/keras/utils/generic_utils.py
index 8ff27a38d77294..12dff66dffa550 100644
--- a/tensorflow/python/keras/utils/generic_utils.py
+++ b/tensorflow/python/keras/utils/generic_utils.py
@@ -453,7 +453,8 @@ def __init__(self, target, width=30, verbose=1, interval=0.05,
     self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and
                               sys.stdout.isatty()) or
                              'ipykernel' in sys.modules or
-                             'posix' in sys.modules)
+                             'posix' in sys.modules or
+                             'PYCHARM_HOSTED' in os.environ)
     self._total_width = 0
     self._seen_so_far = 0
     # We use a dict + list to avoid garbage collection
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 6716597333966d..d8d00dd5891585 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -232,6 +232,7 @@ cuda_py_test(
     shard_count = 5,
     tags = [
         "no_rocm",  # TODO(rocm): feature not supported on ROCm platform
+        "no_windows_gpu",
         "nomsan",  # TODO(b/131773093): Re-enable.
     ],
 )
@@ -711,7 +712,10 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
     ],
-    tags = ["optonly"],
+    tags = [
+        "no_windows_gpu",
+        "optonly",
+    ],
 )
 
 cuda_py_test(
@@ -726,6 +730,7 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
         "//tensorflow/python:math_ops",
     ],
+    tags = ["no_windows_gpu"],
 )
 
 cuda_py_test(
@@ -797,6 +802,7 @@ tf_py_test(
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform",
     ],
+    tags = ["no_mac"],
 )
 
 tf_py_test(
@@ -1582,6 +1588,7 @@ cuda_py_test(
     ],
     shard_count = 10,
     tags = [
+        "no_rocm",
         "noasan",  # times out
         "optonly",  # times out
     ],
@@ -1627,6 +1634,7 @@ cuda_py_test(
         "//tensorflow/python:math_ops",
     ],
     shard_count = 20,
+    tags = ["no_windows_gpu"],
 )
 
 cuda_py_test(
@@ -1677,6 +1685,7 @@ cuda_py_test(
     additional_deps = [
         ":bias_op_base",
     ],
+    tags = ["no_rocm"],
     xla_enable_strict_auto_jit = False,
 )
 
@@ -2181,6 +2190,7 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     shard_count = 20,
+    tags = ["no_windows_gpu"],
 )
 
 cuda_py_test(
@@ -3293,6 +3303,7 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     shard_count = 50,
+    tags = ["no_windows_gpu"],
 )
 
 cuda_py_test(
@@ -3335,6 +3346,7 @@ cuda_py_test(
         "//tensorflow/python:variables",
     ],
     shard_count = 50,
+    tags = ["no_windows_gpu"],
     # b/140155706: nans in result
     xla_enable_strict_auto_jit = False,
 )
@@ -3805,7 +3817,10 @@ cuda_py_test(
         "//tensorflow/python:linalg_ops",
     ],
     shard_count = 10,
-    tags = ["no_rocm"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
 
 cuda_py_test(
@@ -3818,7 +3833,10 @@ cuda_py_test(
     ],
     main = "sparse_csr_matrix_ops_test.py",
     shard_count = 10,
-    tags = ["no_rocm"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
 
 cuda_py_test(
@@ -3829,7 +3847,10 @@ cuda_py_test(
         "//tensorflow/python/ops/linalg/sparse",
     ],
     main = "csr_sparse_matrix_test.py",
-    tags = ["no_rocm"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
 
 cuda_py_test(
@@ -3853,7 +3874,10 @@ cuda_py_test(
     ],
     main = "sparse_csr_matrix_dense_mat_mul_grad_test.py",
     shard_count = 50,
-    tags = ["no_rocm"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
 
 cuda_py_test(
@@ -3865,5 +3889,8 @@ cuda_py_test(
     ],
     main = "sparse_csr_matrix_sparse_mat_mul_grad_test.py",
     shard_count = 50,
-    tags = ["no_rocm"],
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+    ],
 )
diff --git a/tensorflow/python/kernel_tests/boosted_trees/BUILD b/tensorflow/python/kernel_tests/boosted_trees/BUILD
index d19284bbe55c43..aa4d0d0caf9bca 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/BUILD
+++ b/tensorflow/python/kernel_tests/boosted_trees/BUILD
@@ -33,6 +33,7 @@ tf_py_test(
         "//tensorflow/python:training",
         "//tensorflow/python:variables",
     ],
+    tags = ["no_mac"],
 )
 
 tf_py_test(
@@ -47,6 +48,7 @@ tf_py_test(
         "//tensorflow/python:constant_op",
         "//tensorflow/python:resources",
     ],
+    tags = ["no_mac"],
 )
 
 tf_py_test(
@@ -77,6 +79,7 @@ tf_py_test(
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:resources",
     ],
+    tags = ["no_mac"],
 )
 
 tf_py_test(
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 64da8352419761..3c3fcdfdd096dc 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -4542,6 +4542,14 @@ def testUInt64SwitchMerge(self):
       result = control_flow_ops.merge([v_f, v_t])
       self.evaluate(result)
 
+  def testSwitchEagerMode(self):
+    if not context.executing_eagerly():
+      return
+    input_data = [1, 2, 3, 4]
+    vf, vt = control_flow_ops.switch(input_data, False)
+    self.assertAllEqual(vf, input_data)
+    self.assertAllEqual(vt, [])
+
   @test_util.run_deprecated_v1
   def testQIntArgAndRet(self):
 
diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD
index b428356cc24095..9927d270c43edd 100644
--- a/tensorflow/python/kernel_tests/linalg/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/BUILD
@@ -56,6 +56,7 @@ cuda_py_test(
     ],
     shard_count = 5,
     tags = [
+        "no_windows_gpu",
         "noasan",  # times out, b/63678675
         "optonly",  # times out
     ],
@@ -140,6 +141,7 @@ cuda_py_test(
     shard_count = 10,
     tags = [
         "no_rocm",  # calls BLAS ops for complex types
+        "no_windows_gpu",
         "noasan",  # times out, b/63678675
         "optonly",  # times out, b/79171797
     ],
diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD
index 7836d4778cdf86..9096d933d42cc6 100644
--- a/tensorflow/python/kernel_tests/signal/BUILD
+++ b/tensorflow/python/kernel_tests/signal/BUILD
@@ -48,6 +48,7 @@ cuda_py_tests(
     shard_count = 8,
     tags = [
         "no_rocm",
+        "no_windows_gpu",
         "optonly",
     ],
 )
diff --git a/tensorflow/python/kernel_tests/substr_op_test.py b/tensorflow/python/kernel_tests/substr_op_test.py
index 9302152e82bfa9..eae4e10f378567 100644
--- a/tensorflow/python/kernel_tests/substr_op_test.py
+++ b/tensorflow/python/kernel_tests/substr_op_test.py
@@ -492,6 +492,15 @@ def testInvalidUnit(self):
       with self.assertRaises(ValueError):
         string_ops.substr(b"test", 3, 1, unit="UTF8")
 
+  def testInvalidPos(self):
+    # Test case for GitHub issue 46900.
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      x = string_ops.substr(b"abc", len=1, pos=[1, -1])
+      self.evaluate(x)
+
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      x = string_ops.substr(b"abc", len=1, pos=[1, 2])
+      self.evaluate(x)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/transpose_op_test.py b/tensorflow/python/kernel_tests/transpose_op_test.py
index 8d1fe388c55be1..6ae67bef8b86a1 100644
--- a/tensorflow/python/kernel_tests/transpose_op_test.py
+++ b/tensorflow/python/kernel_tests/transpose_op_test.py
@@ -373,6 +373,8 @@ def testDouble(self):
 
   @test_util.run_v1_only("b/120545219")
   def testComplex64(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex64))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex64))
     self._testBoth(
         np.complex(1, 2) *
         np.arange(0, 21).reshape([3, 7]).astype(np.complex64))
@@ -385,6 +387,8 @@ def testComplex64(self):
 
   @test_util.run_v1_only("b/120545219")
   def testComplex128(self):
+    self._testBoth(np.array(np.complex(1, 2)).astype(np.complex128))
+    self._testBoth(np.complex(1, 2) * np.arange(0, 21).astype(np.complex128))
     self._testBoth(
         np.complex(1, 2) *
         np.arange(0, 21).reshape([3, 7]).astype(np.complex128))
diff --git a/tensorflow/python/lib/core/ndarray_tensor.cc b/tensorflow/python/lib/core/ndarray_tensor.cc
index 8c8362972beef3..7bf71b47b0f3e9 100644
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/python/lib/core/ndarray_tensor.h"
 
 #include <cstring>
+#include <optional>
 
 #include "tensorflow/core/lib/core/coding.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -72,6 +73,13 @@ Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr,
   PyObject* key;
   PyObject* value;
   Py_ssize_t pos = 0;
+
+  // Return an error if the fields attribute is null.
+  // Occurs with an improper conversion attempt to resource.
+  if (descr->fields == nullptr) {
+    return errors::Internal("Unexpected numpy data type");
+  }
+
   if (PyDict_Next(descr->fields, &pos, &key, &value)) {
     // In Python 3, the keys of numpy custom struct types are unicode, unlike
     // Python 2, where the keys are bytes.
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index e2389e70f80240..34ad7549f7e4b4 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -27,6 +27,7 @@
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
@@ -1184,6 +1185,46 @@ def testArbitraryASCII(self):
       y_val = self.evaluate(y)
       self.assertAllEqual(y_val, y_val_expected)
 
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testInvalidLength(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                "Source format must be of length 4 or 5"):
+      op = nn_ops.data_format_dim_map(
+          x, src_format="12345678", dst_format="87654321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testDuplicateSrc(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_dim_map(x, src_format="1233", dst_format="4321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testDuplicateDst(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_dim_map(x, src_format="1234", dst_format="3321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testExtraSpecifiers(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_dim_map(x, src_format="1234", dst_format="5321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
 
 class DataFormatVectorPermuteTest(test_lib.TestCase):
 
@@ -1251,6 +1292,49 @@ def testNCHWToNHWC2D(self):
       y_val = self.evaluate(y)
       self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]])
 
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testInvalidLength(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                "Source format must be of length 4 or 5"):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format="12345678", dst_format="87654321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testDuplicateSrc(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format="1233", dst_format="4321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testDuplicateDst(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format="1234", dst_format="3321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla("XLA catches the error and rethrows as different one")
+  def testExtraSpecifiers(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Destination and source format must determine a permutation"):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format="1234", dst_format="5321")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class AvgPoolTest(test_lib.TestCase):
diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD
index dff4b92203616e..be05700d0d26fd 100644
--- a/tensorflow/python/ops/parallel_for/BUILD
+++ b/tensorflow/python/ops/parallel_for/BUILD
@@ -170,7 +170,11 @@ cuda_py_test(
         "//tensorflow/python:util",
     ],
     shard_count = 5,
-    tags = ["optonly"],  # Too slow in non-opt mode
+    tags = [
+        "no_rocm",
+        "no_windows_gpu",
+        "optonly",  # Too slow in non-opt mode
+    ],
 )
 
 py_library(
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index 0ab663dd347971..f6549fc90d9b94 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -936,6 +936,7 @@ py_test(
     srcs = ["ragged_dispatch_test.py"],
     python_version = "PY2",
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         ":ragged",  # fixdeps: keep
         ":ragged_factory_ops",
@@ -977,6 +978,7 @@ py_test(
     srcs = ["ragged_map_fn_op_test.py"],
     python_version = "PY2",
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         ":ragged",  # fixdeps: keep
         ":ragged_factory_ops",
diff --git a/tensorflow/python/ops/raw_ops_test.py b/tensorflow/python/ops/raw_ops_test.py
index fff94f5c25ae8c..76250102245180 100644
--- a/tensorflow/python/ops/raw_ops_test.py
+++ b/tensorflow/python/ops/raw_ops_test.py
@@ -18,16 +18,20 @@
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class RawOpsTest(test.TestCase):
+class RawOpsTest(test.TestCase, parameterized.TestCase):
 
   def testSimple(self):
     x = constant_op.constant(1)
@@ -58,6 +62,13 @@ def testDefaults(self):
         gen_math_ops.Any(input=x, axis=0),
         gen_math_ops.Any(input=x, axis=0, keep_dims=False))
 
+  def testGetSessionHandle(self):
+    if context.executing_eagerly():
+      with self.assertRaisesRegex(
+          errors.FailedPreconditionError,
+          "GetSessionHandle called on null session state"):
+        gen_data_flow_ops.GetSessionHandle(value=[1])
+
 
 if __name__ == "__main__":
   ops.enable_eager_execution()
diff --git a/tensorflow/python/ops/sparse_ops_test.py b/tensorflow/python/ops/sparse_ops_test.py
index 90dbded64329f2..7a43639c9831fb 100644
--- a/tensorflow/python/ops/sparse_ops_test.py
+++ b/tensorflow/python/ops/sparse_ops_test.py
@@ -21,13 +21,16 @@
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 # Need array_grad to register gradient for Identity.
 from tensorflow.python.ops import array_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import gen_sparse_ops
 from tensorflow.python.ops import gradient_checker_v2 as gradient_checker
 from tensorflow.python.ops import math_ops
 # Need sparse_grad to register gradient for SparseToDense.
@@ -144,5 +147,57 @@ def testSparseTensorToDenseString(self):
     self.assertAllEqual(expected_dense, result_dense)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class RawOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+
+  def testSparseFillEmptyRowsGrad(self):
+    reverse_index_map = [2, 1]
+    grad_values = [0, 1, 2, 3]
+    d_values, d_default_value = self.evaluate(
+        gen_sparse_ops.SparseFillEmptyRowsGrad(
+            reverse_index_map=reverse_index_map, grad_values=grad_values))
+    self.assertAllEqual([2, 1], d_values)
+    self.assertEqual(3, d_default_value)
+
+  def testSparseFillEmptyRowsGradNegativeIndexMapValue(self):
+    reverse_index_map = [2, -1]
+    grad_values = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r'Elements in reverse index must be in \[0, 4\)'):
+      self.evaluate(
+          gen_sparse_ops.SparseFillEmptyRowsGrad(
+              reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+  def testSparseFillEmptyRowsGradLargeIndexMapValue(self):
+    reverse_index_map = [2, 10]
+    grad_values = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r'Elements in reverse index must be in \[0, 4\)'):
+      self.evaluate(
+          gen_sparse_ops.SparseFillEmptyRowsGrad(
+              reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+  def testSparseFillEmptyRowsGradMatrix(self):
+    reverse_index_map = [0, 1]
+    grad_values = [[0, 1], [2, 3]]
+    # Note: Eager mode and graph mode throw different errors here. Graph mode
+    # will fail with a ValueError from the shape checking logic, while Eager
+    # will fail with an InvalidArgumentError from the kernel itself.
+    if context.executing_eagerly():
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  r'grad_values must be a vector'):
+        self.evaluate(
+            gen_sparse_ops.SparseFillEmptyRowsGrad(
+                reverse_index_map=reverse_index_map, grad_values=grad_values))
+    else:
+      with self.assertRaisesRegex(ValueError,
+                                  r'Shape must be rank 1 but is rank 2'):
+        self.evaluate(
+            gen_sparse_ops.SparseFillEmptyRowsGrad(
+                reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/tf2.py b/tensorflow/python/tf2.py
index fd1c8c1757a7a6..caf3c78648027d 100644
--- a/tensorflow/python/tf2.py
+++ b/tensorflow/python/tf2.py
@@ -43,6 +43,6 @@ def disable():
 def enabled():
   """Returns True iff TensorFlow 2.0 behavior should be enabled."""
   if _force_enable is None:
-    return os.getenv("TF2_BEHAVIOR", "0") != "0"
+    return os.getenv("TF2_BEHAVIOR", "1") == "1"
   else:
     return _force_enable
diff --git a/tensorflow/python/tools/api/generator/api_gen.bzl b/tensorflow/python/tools/api/generator/api_gen.bzl
index b567a2291774fb..6595960c34107a 100644
--- a/tensorflow/python/tools/api/generator/api_gen.bzl
+++ b/tensorflow/python/tools/api/generator/api_gen.bzl
@@ -84,10 +84,10 @@ def gen_api_init_files(
     """
     root_init_template_flag = ""
     if root_init_template:
-        root_init_template_flag = "--root_init_template=$(location " + root_init_template + ")"
+        root_init_template_flag = "--root_init_template=" + root_init_template
 
     primary_package = packages[0]
-    api_gen_binary_target = ("create_" + primary_package + "_api_%d_%s") % (api_version, name)
+    api_gen_binary_target = ("create_" + primary_package + "_api_%s") % name
     native.py_binary(
         name = api_gen_binary_target,
         srcs = ["//tensorflow/python/tools/api/generator:create_python_api.py"],
diff --git a/tensorflow/python/tpu/BUILD b/tensorflow/python/tpu/BUILD
index f2262c395f6611..e394964977d332 100644
--- a/tensorflow/python/tpu/BUILD
+++ b/tensorflow/python/tpu/BUILD
@@ -32,6 +32,7 @@ py_test(
         "no_oss_py2",
         "no_oss_py35",
         "no_pip",
+        "no_rocm",
     ],
     deps = [
         "//tensorflow/python:client_testlib",
@@ -76,6 +77,7 @@ tpu_py_test(
     size = "medium",
     srcs = ["async_checkpoint_test.py"],
     disable_experimental = True,
+    tags = ["no_rocm"],
     deps = [
         ":async_checkpoint",
         ":tpu_estimator",
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 70cc11a3e03148..b53ad905991367 100755
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -1383,7 +1383,9 @@ class CudnnRnnSequenceTensorDescriptor
   static port::StatusOr<CudnnRnnSequenceTensorDescriptor> Create(
       GpuExecutor* parent, int max_seq_length, int batch_size, int data_size,
       cudnnDataType_t data_type) {
-    CHECK_GT(max_seq_length, 0);
+    if (max_seq_length <= 0) {
+      return port::Status(port::error::INVALID_ARGUMENT, "max_seq_length <= 0");
+    }
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
     TensorDescriptor tensor_desc = CreateTensorDescriptor();
@@ -1404,7 +1406,9 @@ class CudnnRnnSequenceTensorDescriptor
       const absl::Span<const int>& seq_lengths, bool time_major,
       cudnnDataType_t data_type) {
 #if CUDNN_VERSION >= 7201
-    CHECK_GT(max_seq_length, 0);
+    if (max_seq_length <= 0) {
+      return port::Status(port::error::INVALID_ARGUMENT, "max_seq_length <= 0");
+    }
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
     TensorDescriptor tensor_desc = CreateTensorDescriptor();
diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc
index 9ae8b41ccf47f8..4ed5f84c51c1c9 100644
--- a/tensorflow/stream_executor/platform/default/dso_loader.cc
+++ b/tensorflow/stream_executor/platform/default/dso_loader.cc
@@ -134,7 +134,13 @@ port::StatusOr<void*> GetRocrandDsoHandle() {
   return GetDsoHandle("rocrand", "");
 }
 
-port::StatusOr<void*> GetHipDsoHandle() { return GetDsoHandle("hip_hcc", ""); }
+port::StatusOr<void*> GetHipDsoHandle() {
+#if TENSORFLOW_COMPILER_IS_HIP_CLANG
+  return GetDsoHandle("amdhip64", "");
+#else
+  return GetDsoHandle("hip_hcc", "");
+#endif
+}
 
 }  // namespace DsoLoader
 
diff --git a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
index e22a243a70bd3a..fd3b5f19913ba2 100644
--- a/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
+++ b/tensorflow/stream_executor/rocm/rocm_gpu_executor.cc
@@ -132,6 +132,11 @@ bool GpuExecutor::UnloadGpuBinary(const void* gpu_binary) {
     VLOG(3) << "Unloading  HSACO module " << module;
     GpuDriver::UnloadModule(context_, module);
     gpu_binary_to_module_.erase(module_it);
+    const char* mem_it = nullptr;
+    for (auto x : in_memory_modules_) {
+      if (x.second == module) mem_it = x.first;
+    }
+    if (mem_it != nullptr) in_memory_modules_.erase(mem_it);
   }
   return true;
 }
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 740f24ec4a4afe..60623302fb8550 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -54,7 +54,7 @@ def register_extension_info(**kwargs):
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.1.0"
+VERSION = "2.1.4"
 VERSION_MAJOR = VERSION.split(".")[0]
 
 def if_v2(a):
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 383dbb4ab1f94f..321fc381290a34 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -367,7 +367,9 @@ def _ReadFileToProto(filename):
         api_version=api_version)
 
   def testAPIBackwardsCompatibility(self):
-    api_version = 2 if '_api.v2' in tf.bitwise.__name__ else 1
+    api_version = 1
+    if hasattr(tf, '_major_api_version') and tf._major_api_version == 2:
+      api_version = 2
     golden_file_pattern = os.path.join(
         resource_loader.get_root_dir_with_all_resources(),
         _KeyToFilePath('*', api_version))
diff --git a/tensorflow/tools/api/tests/module_test.py b/tensorflow/tools/api/tests/module_test.py
index 1732ba41e70b56..c0870e6c134cb3 100644
--- a/tensorflow/tools/api/tests/module_test.py
+++ b/tensorflow/tools/api/tests/module_test.py
@@ -24,6 +24,7 @@
 import tensorflow as tf
 
 from tensorflow.python import tf2
+from tensorflow.python.keras import layers
 from tensorflow.python.platform import test
 
 
@@ -73,12 +74,23 @@ def testSummaryMerged(self):
     tf.summary.image
     # If we use v2 API, check for create_file_writer,
     # otherwise check for FileWriter.
-    if '._api.v2' in tf.bitwise.__name__:
-      tf.summary.create_file_writer
-    else:
-      tf.summary.FileWriter
+    if hasattr(tf, '_major_api_version'):
+      if tf._major_api_version == 2:
+        tf.summary.create_file_writer
+      else:
+        tf.summary.FileWriter
     # pylint: enable=pointless-statement
 
+  def testInternalKerasImport(self):
+    normalization_parent = layers.BatchNormalization.__module__.split('.')[-1]
+    if hasattr(tf, '_major_api_version'):
+      if tf._major_api_version == 2:
+        self.assertEqual('normalization_v2', normalization_parent)
+        self.assertTrue(layers.BatchNormalization._USE_V2_BEHAVIOR)
+      else:
+        self.assertEqual('normalization', normalization_parent)
+        self.assertFalse(layers.BatchNormalization._USE_V2_BEHAVIOR)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
index a083bc6debd9e6..a0f78eefe1dc44 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -1,10 +1,12 @@
 # This Dockerfile provides a starting point for a ROCm installation of
 # MIOpen and tensorflow.
-FROM ubuntu:xenial
+FROM ubuntu:bionic
 MAINTAINER Jeff Poznanovic <jeffrey.poznanovic@amd.com>
 
-ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/2.8.0/
-ARG ROCM_PATH=/opt/rocm
+ARG ROCM_DEB_REPO=http://repo.radeon.com/rocm/apt/3.9/
+ARG ROCM_BUILD_NAME=xenial
+ARG ROCM_BUILD_NUM=main
+ARG ROCM_PATH=/opt/rocm-3.9.0
 
 ENV DEBIAN_FRONTEND noninteractive
 ENV TF_NEED_ROCM 1
@@ -13,15 +15,19 @@ RUN apt update && apt install -y wget software-properties-common
 
 # Add rocm repository
 RUN apt-get clean all
-RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
-RUN sh -c  "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list"
+RUN bin/bash -c 'if [[ $ROCM_DEB_REPO == http://repo.radeon.com/rocm/*  ]] ; then \
+      wget -qO - $ROCM_DEB_REPO/rocm.gpg.key | apt-key add -; \
+      echo "deb [arch=amd64] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list; \
+    else \
+      echo "deb [arch=amd64 trusted=yes] $ROCM_DEB_REPO $ROCM_BUILD_NAME $ROCM_BUILD_NUM" > /etc/apt/sources.list.d/rocm.list ; \
+    fi'
 
 # Install misc pkgs
 RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \
   build-essential \
-  clang-3.8 \
-  clang-format-3.8 \
-  clang-tidy-3.8 \
+  clang-6.0 \
+  clang-format-6.0 \
+  clang-tidy-6.0 \
   cmake \
   cmake-qt-gui \
   ssh \
@@ -58,11 +64,11 @@ RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteracti
 RUN apt-get update --allow-insecure-repositories && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
     rocm-dev rocm-libs hipcub rocm-utils rocm-cmake \
-    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl \
-    rocm-profiler cxlactivitylogger && \
+    rocfft miopen-hip miopengemm rocblas hipblas rocrand rccl && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Set up paths
 ENV HCC_HOME=$ROCM_PATH/hcc
 ENV HIP_PATH=$ROCM_PATH/hip
 ENV OPENCL_ROOT=$ROCM_PATH/opencl
@@ -71,14 +77,26 @@ ENV PATH="$ROCM_PATH/bin:${PATH}"
 ENV PATH="$OPENCL_ROOT/bin:${PATH}"
 
 # Add target file to help determine which device(s) to build for
-RUN bash -c 'echo -e "gfx803\ngfx900\ngfx906" >> /opt/rocm/bin/target.lst'
+RUN bash -c 'echo -e "gfx803\ngfx900\ngfx906" >> ${ROCM_PATH}/bin/target.lst'
+
+# Need to explicitly create the $ROCM_PATH/.info/version file to workaround what seems to be a bazel bug
+# The env vars being set via --action_env in .bazelrc and .tf_configure.bazelrc files are sometimes
+# not getting set in the build command being spawned by bazel (in theory this should not happen)
+# As a consequence ROCM_PATH is sometimes not set for the hipcc commands.
+# When hipcc incokes hcc, it specifies $ROCM_PATH/.../include dirs via the `-isystem` options
+# If ROCM_PATH is not set, it defaults to /opt/rocm, and as a consequence a dependency is generated on the
+# header files included within `/opt/rocm`, which then leads to bazel dependency errors
+# Explicitly creating the $ROCM_PATH/.info/version allows ROCM path to be set correrctly, even when ROCM_PATH
+# is not explicitly set, and thus avoids the eventual bazel dependency error.
+# The bazel bug needs to be root-caused and addressed, but that is out of our control and may take a long time
+# to come to fruition, so implementing the workaround to make do till then
+# Filed https://github.com/bazelbuild/bazel/issues/11163 for tracking this
+RUN touch ${ROCM_PATH}/.info/version
 
 # Copy and run the install scripts.
 COPY install/*.sh /install/
 ARG DEBIAN_FRONTEND=noninteractive
 RUN /install/install_bootstrap_deb_packages.sh
-RUN add-apt-repository -y ppa:openjdk-r/ppa && \
-    add-apt-repository -y ppa:george-edison55/cmake-3.x
 RUN /install/install_deb_packages.sh
 RUN /install/install_pip_packages.sh
 RUN /install/install_bazel.sh
@@ -90,3 +108,7 @@ COPY install/.bazelrc /etc/bazel.bazelrc
 # Configure the build for our ROCm configuration.
 ENV TF_NEED_ROCM 1
 
+# This is a temporary workaround to fix Out-Of-Memory errors we are running into with XLA perf tests
+# By default, HIP runtime "hides" 256MB from the TF Runtime, but with recent changes (update to ROCm2.3, dynamic loading of roc* libs, et al)
+# it seems that we need to up the threshold slightly to 320MB
+ENV HIP_HIDDEN_FREE_MEM=320
diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh
index bd810016d2a050..ae9bf52309b8bc 100755
--- a/tensorflow/tools/ci_build/install/install_deb_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh
@@ -38,12 +38,16 @@ if [[ "$ubuntu_version" == "14" ]]; then
   apt-get dist-upgrade -y
 fi
 
+if [[ "$ubuntu_version" == "16" ]]; then
+  apt-get install -y --no-install-recommends \
+      clang-format-3.8
+fi
+
 ## TODO(yifeif) remove ffmpeg once ffmpeg is removed from contrib
 apt-get install -y --no-install-recommends \
     autoconf \
     automake \
     build-essential \
-    clang-format-3.8 \
     curl \
     ffmpeg \
     git \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 170482b45657c7..88b2d9a227e0d0 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -15,10 +15,24 @@
 # ==============================================================================
 
 set -e
+ubuntu_version=$(cat /etc/issue | grep -i ubuntu | awk '{print $2}' | \
+  awk -F'.' '{print $1}')
+
+if [[ "$1" != "" ]] && [[ "$1" != "--without_cmake" ]]; then
+  echo "Unknown argument '$1'"
+  exit 1
+fi
 
 # Get the latest version of pip so it recognize manylinux2010
-easy_install3 -U pip
-easy_install -U pip
+if [[ "$ubuntu_version" == "18" ]]; then
+  wget https://bootstrap.pypa.io/get-pip.py
+  python3 get-pip.py
+  python get-pip.py
+  rm -f get-pip.py
+else
+  easy_install3 -U pip
+  easy_install -U pip
+fi
 
 # Install pip packages from whl files to avoid the time-consuming process of
 # building from source.
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
index 0286d0aea4c372..53fef4c3c2a448 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh
@@ -18,27 +18,47 @@
 set -e
 set -x
 
-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+TF_TESTS_PER_GPU=1
+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
 
 echo ""
-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
 echo ""
 
+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
+ROCM_INSTALL_DIR=/opt/rocm-3.9.0
+if [[ -n $1 ]]; then
+    ROCM_INSTALL_DIR=$1
+fi
+
 # Run configure.
 export PYTHON_BIN_PATH=`which python3`
 export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
-export TF_GPU_COUNT=${N_GPUS}
+export ROCM_PATH=$ROCM_INSTALL_DIR
 
 yes "" | $PYTHON_BIN_PATH configure.py
 
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \
-    --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \
-    --test_sharding_strategy=disabled \
-    --test_size_filters=small,medium \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --test_lang_filters=cc \
+      --jobs=${N_BUILD_JOBS} \
+      --local_test_jobs=${N_TEST_JOBS} \
+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium,large \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/... \
+      -//tensorflow/compiler/... \
+      -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
index 61813dfde30e7a..301382a5364c67 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_csb_tests.sh
@@ -18,19 +18,27 @@
 set -e
 set -x
 
-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+TF_TESTS_PER_GPU=1
+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
 
 echo ""
-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
 echo ""
 
+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
+ROCM_INSTALL_DIR=/opt/rocm-3.9.0
+if [[ -n $1 ]]; then
+    ROCM_INSTALL_DIR=$1
+fi
+
 # Run configure.
 export PYTHON_BIN_PATH=`which python3`
 export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
-export TF_GPU_COUNT=${N_GPUS}
+export ROCM_PATH=$ROCM_INSTALL_DIR
 
 yes "" | $PYTHON_BIN_PATH configure.py
 
@@ -38,12 +46,15 @@ yes "" | $PYTHON_BIN_PATH configure.py
 bazel test \
       --config=rocm \
       -k \
-      --test_tag_filters=gpu,-no_gpu,-no_rocm,-benchmark-test,-no_oss,-oss_serial,-rocm_multi_gpu, \
+      --test_tag_filters=gpu,-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --jobs=${N_BUILD_JOBS} \
+      --local_test_jobs=${N_TEST_JOBS} \
+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
       --test_timeout 600,900,2400,7200 \
       --test_output=errors \
-      --jobs=${N_JOBS} \
-      --local_test_jobs=${TF_GPU_COUNT} \
       --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium \
       --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
       -- \
       //tensorflow/... \
@@ -56,8 +67,8 @@ bazel test \
       --test_tag_filters=gpu \
       --test_timeout 600,900,2400,7200 \
       --test_output=errors \
-      --jobs=${N_JOBS} \
-      --local_test_jobs=1 \
+      --jobs=${N_BUILD_JOBS} \
+      --local_test_jobs=${N_TEST_JOBS} \
       --test_sharding_strategy=disabled \
       -- \
       //tensorflow/core/nccl:nccl_manager_test
diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
index 424b3e6fa0a0b8..385d45de448393 100755
--- a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
+++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -18,26 +18,47 @@
 set -e
 set -x
 
-N_JOBS=$(grep -c ^processor /proc/cpuinfo)
-N_GPUS=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
+TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+TF_TESTS_PER_GPU=1
+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
 
 echo ""
-echo "Bazel will use ${N_JOBS} concurrent build job(s) and ${N_GPUS} concurrent test job(s)."
+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
 echo ""
 
+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
+ROCM_INSTALL_DIR=/opt/rocm-3.9.0
+if [[ -n $1 ]]; then
+    ROCM_INSTALL_DIR=$1
+fi
+
 # Run configure.
 export PYTHON_BIN_PATH=`which python3`
 export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
-export TF_GPU_COUNT=${N_GPUS}
+export ROCM_PATH=$ROCM_INSTALL_DIR
 
 yes "" | $PYTHON_BIN_PATH configure.py
 
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test -k \
-    --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} --config=opt \
-    --test_sharding_strategy=disabled \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute -- \
-    //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/...
+bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --test_lang_filters=py \
+      --jobs=${N_BUILD_JOBS} \
+      --local_test_jobs=${N_TEST_JOBS} \
+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/... \
+      -//tensorflow/compiler/... \
+      -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh b/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh
new file mode 100644
index 00000000000000..c0846a05236227
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_libtensorflow.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+echo "chmod go+w lib_package/*" >> tensorflow/tools/ci_build/linux/libtensorflow.sh
+echo "bazel clean --expunge" >> tensorflow/tools/ci_build/linux/libtensorflow.sh
+
+# Install latest bazel
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_macos
+
+tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh
new file mode 100644
index 00000000000000..d821656ba12efe
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py35_nonpip.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+python3.5 -m virtualenv tf_build_env --system-site-packages
+source tf_build_env/bin/activate
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.5
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export TF2_BEHAVIOR=1
+export PYTHON_BIN_PATH=$(which python3.5)
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} \
+  -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh
new file mode 100644
index 00000000000000..4559c1896164eb
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py35_pip.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.5
+
+# Export required variables for running pip_new.sh
+export OS_TYPE="MACOS"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.5'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="//tensorflow/python/..."
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py35,-gpu,-tpu,-benchmark-test'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh
new file mode 100644
index 00000000000000..93205f8a60d458
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py36_nonpip.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+python3.6 -m virtualenv tf_build_env --system-site-packages
+source tf_build_env/bin/activate
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.6
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export TF2_BEHAVIOR=1
+export PYTHON_BIN_PATH=$(which python3.6)
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+tag_filters="-no_oss,-oss_serial,-nomac,-no_mac,-no_oss_py36,-v1only,-gpu,-tpu,-benchmark-test"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} \
+  -//tensorflow/lite/...
+
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh
new file mode 100644
index 00000000000000..0ae2c3b4069667
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py36_pip.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.6
+
+# Export required variables for running pip_new.sh
+export OS_TYPE="MACOS"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.6'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="//tensorflow/python/..."
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py35,-v1only,-gpu,-tpu,-benchmark-test'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh
new file mode 100644
index 00000000000000..de34e7be8e33e1
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py37_nonpip.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+python -m virtualenv tf_build_env --system-site-packages
+source tf_build_env/bin/activate
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.7
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export TF2_BEHAVIOR=1
+export PYTHON_BIN_PATH=$(which python3.7)
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+tag_filters="-no_oss,-oss_serial,-nomac,-no_mac$(maybe_skip_v1)"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} \
+  -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh
new file mode 100644
index 00000000000000..2d5fb071913aff
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/macos/cpu_py37_pip.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+# Install latest bazel
+update_bazel_macos
+which bazel
+bazel version
+set_bazel_outdir
+
+# Pick a more recent version of xcode
+export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer
+sudo xcode-select -s "${DEVELOPER_DIR}"
+
+# Install macos pip dependencies
+install_macos_pip_deps sudo pip3.7
+
+# Export required variables for running pip_new.sh
+export OS_TYPE="MACOS"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.7'
+export TF_BUILD_BOTH_CPU_PACKAGES=1
+
+# Run configure.
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="//tensorflow/python/..."
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-nomac,-no_mac,-no_oss,-oss_serial,-no_oss_py37,-v1only,-gpu,-tpu,-benchmark-test'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh
new file mode 100644
index 00000000000000..c134888d7c3c15
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_nonpip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.5
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.5)
+export TF2_BEHAVIOR=1
+yes "" | "$PYTHON_BIN_PATH" configure.py
+tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py35,-v1only"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt --test_lang_filters=py \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh
new file mode 100644
index 00000000000000..6cfc7cfb97a8e9
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py35_pip.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.5
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.5'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py35,-v1only'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow_cpu"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh
new file mode 100644
index 00000000000000..3c199a667b05d9
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_nonpip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.6
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.6)
+export TF2_BEHAVIOR=1
+yes "" | "$PYTHON_BIN_PATH" configure.py
+tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py36,-v1only"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt --test_lang_filters=py \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh
new file mode 100644
index 00000000000000..7dff4ccddcde75
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py36_pip.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.6
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.6'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py36,-v1only'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow_cpu"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh
new file mode 100644
index 00000000000000..7b68de4bc4f4ef
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_nonpip.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.7
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.7)
+export TF2_BEHAVIOR=1
+yes "" | "$PYTHON_BIN_PATH" configure.py
+tag_filters="-no_oss,-oss_serial,-gpu,-tpu,-benchmark-test,-no_oss_py37,-v1only"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Run tests
+bazel test --test_output=errors --config=opt --test_lang_filters=py \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --build_tag_filters="${tag_filters}" \
+  --test_tag_filters="${tag_filters}" -- \
+  ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh
new file mode 100644
index 00000000000000..1f77390eee06c3
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_py37_pip.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.7
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="CPU"
+export TF_PYTHON_VERSION='python3.7'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=0
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_BUILD_FLAGS="--config=opt --config=v2 --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0:toolchain"
+export TF_TEST_FLAGS="--define=no_tensorflow_py_deps=true --test_lang_filters=py --test_output=errors --verbose_failures=true --keep_going --test_env=TF2_BEHAVIOR=1"
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS='-no_oss,-oss_serial,-no_oss_py37,-v1only'
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME="tensorflow_cpu"
+export TF_PIP_TEST_ROOT="pip_test"
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_pip_on_cpu.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_pip_on_cpu.sh
new file mode 100755
index 00000000000000..d6c2df745e1f26
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_pip_on_cpu.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.6
+# Update Bazel to the desired version
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.6)
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+########################
+## Build GPU pip package
+########################
+bazel build --config=opt \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
+  tensorflow/tools/pip_package:build_pip_package
+
+PIP_WHL_DIR=whl
+mkdir -p ${PIP_WHL_DIR}
+PIP_WHL_DIR=$(readlink -f ${PIP_WHL_DIR})  # Get absolute path
+bazel-bin/tensorflow/tools/pip_package/build_pip_package "${PIP_WHL_DIR}"
+WHL_PATH=$(ls "${PIP_WHL_DIR}"/*.whl)
+
+cp "${WHL_PATH}" "$(pwd)"/.
+chmod +x tensorflow/tools/ci_build/builds/docker_cpu_pip.sh
+docker run -e "BAZEL_VERSION=${BAZEL_VERSION}" -e "CI_BUILD_USER=$(id -u -n)" -e "CI_BUILD_UID=$(id -u)"  -e "CI_BUILD_GROUP=$(id -g -n)" -e "CI_BUILD_GID=$(id -g)"  -e "CI_BUILD_HOME=/bazel_pip" -v "$(pwd)":/bazel_pip tensorflow/tensorflow:devel-py3 "./bazel_pip/tensorflow/tools/ci_build/builds/with_the_same_user" "./bazel_pip/tensorflow/tools/ci_build/builds/docker_cpu_pip.sh"
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh
new file mode 100644
index 00000000000000..13f6ce837a9717
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_nonpip.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.5
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.5)
+export TF2_BEHAVIOR=1
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+tag_filters="gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py35"
+
+bazel test --config=cuda --config=opt \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --test_lang_filters=py \
+  --test_tag_filters=${tag_filters} \
+  --build_tag_filters=${tag_filters} \
+  --test_timeout="300,450,1200,3600" --local_test_jobs=4 \
+  --test_output=errors --verbose_failures=true --keep_going \
+  --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+  -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh
new file mode 100644
index 00000000000000..4fe4edb8d9cad9
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py35_pip.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.5
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="GPU"
+export TF_PYTHON_VERSION='python3.5'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py35'
+export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain "
+export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \
+--distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \
+--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \
+--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \
+--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute "
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME=${PROJECT_NAME}
+export TF_PIP_TEST_ROOT="pip_test"
+
+# To build both tensorflow and tensorflow-gpu pip packages
+export TF_BUILD_BOTH_GPU_PACKAGES=1
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh
new file mode 100644
index 00000000000000..38ce102e990e5b
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_nonpip.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.6
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.6)
+export TF2_BEHAVIOR=1
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+tag_filters="gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py36"
+
+bazel test --config=cuda --config=opt \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --test_lang_filters=py \
+  --test_tag_filters=${tag_filters} \
+  --build_tag_filters=${tag_filters} \
+  --test_timeout="300,450,1200,3600" --local_test_jobs=4 \
+  --test_output=errors --verbose_failures=true --keep_going \
+  --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+  -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh
new file mode 100644
index 00000000000000..e24b9f5019f249
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py36_pip.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.6
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="GPU"
+export TF_PYTHON_VERSION='python3.6'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py36'
+export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain "
+export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \
+--distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \
+--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \
+--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \
+--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute "
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME=${PROJECT_NAME}
+export TF_PIP_TEST_ROOT="pip_test"
+
+# To build both tensorflow and tensorflow-gpu pip packages
+export TF_BUILD_BOTH_GPU_PACKAGES=1
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh
new file mode 100644
index 00000000000000..0a7bbb381378aa
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_nonpip.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.7
+# Update bazel
+update_bazel_linux
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which python3.7)
+export TF2_BEHAVIOR=1
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+tag_filters="gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py37"
+
+bazel test --config=cuda --config=opt \
+  --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain \
+  --linkopt=-lrt \
+  --action_env=TF2_BEHAVIOR="${TF2_BEHAVIOR}" \
+  --test_lang_filters=py \
+  --build_tag_filters=${tag_filters} \
+  --test_tag_filters=${tag_filters} \
+  --test_timeout="300,450,1200,3600" --local_test_jobs=4 \
+  --test_output=errors --verbose_failures=true --keep_going \
+  --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+  -- ${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/...
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh
new file mode 100644
index 00000000000000..ff30c1e88af401
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/gpu_py37_pip.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+install_ubuntu_16_pip_deps pip3.7
+# Update bazel
+update_bazel_linux
+
+# Export required variables for running pip.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="GPU"
+export TF_PYTHON_VERSION='python3.7'
+
+# Run configure.
+export TF_NEED_GCP=1
+export TF_NEED_HDFS=1
+export TF_NEED_S3=1
+export TF_NEED_CUDA=1
+export TF_CUDA_VERSION=10.1
+export TF_CUDNN_VERSION=7
+export TF_NEED_TENSORRT=1
+export TENSORRT_INSTALL_PATH=/usr/local/tensorrt
+export CC_OPT_FLAGS='-mavx'
+export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION})
+export PROJECT_NAME="tensorflow_gpu"
+export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib"
+export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0
+
+yes "" | "$PYTHON_BIN_PATH" configure.py
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/PRESUBMIT_BUILD_TARGETS.sh
+
+# Export optional variables for running pip.sh
+export TF_TEST_FILTER_TAGS='gpu,requires-gpu,-no_gpu,-nogpu,-no_oss,-oss_serial,-no_oss_py37'
+export TF_BUILD_FLAGS="--config=opt --config=v2 --config=cuda --distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain "
+export TF_TEST_FLAGS="--test_tag_filters=${TF_TEST_FILTER_TAGS} --build_tag_filters=${TF_TEST_FILTER_TAGS} \
+--distinct_host_configuration=false \
+--action_env=TF_CUDA_VERSION --action_env=TF_CUDNN_VERSION --test_env=TF2_BEHAVIOR=1 \
+--config=cuda --test_output=errors --local_test_jobs=4 --test_lang_filters=py \
+--verbose_failures=true --keep_going --define=no_tensorflow_py_deps=true \
+--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute "
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} -//tensorflow/lite/... "
+export TF_PIP_TESTS="test_pip_virtualenv_non_clean test_pip_virtualenv_clean"
+export IS_NIGHTLY=0 # Not nightly
+export TF_PROJECT_NAME=${PROJECT_NAME}
+export TF_PIP_TEST_ROOT="pip_test"
+
+# To build both tensorflow and tensorflow-gpu pip packages
+export TF_BUILD_BOTH_GPU_PACKAGES=1
+
+./tensorflow/tools/ci_build/builds/pip_new.sh
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh b/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh
new file mode 100644
index 00000000000000..d504650da458fe
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/sanity.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+set -e
+
+# Install latest bazel
+source tensorflow/tools/ci_build/release/common.sh
+update_bazel_linux
+which bazel
+
+# We need py3 lint
+sudo pip3 install pep8
+
+# TODO(gunan): figure out why we get stuck with later versions of pylint.
+# Install pylint.
+sudo python2 -m pip install pylint==1.6.4
+sudo python3 -m pip install pylint==1.6.4
+
+# TODO(yifeif): print pylint version for debug. remove later.
+python3 -m pylint --version
+
+# Run tensorflow sanity checks.
+tensorflow/tools/ci_build/ci_sanity.sh
diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat
new file mode 100644
index 00000000000000..67941234b155c0
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/cpu_libtensorflow.bat
@@ -0,0 +1,20 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\cpu\bazel\run_libtensorflow.bat || exit /b 1
+
+copy lib_package %TF_ARTIFACTS_DIR%\lib_package
diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat
new file mode 100644
index 00000000000000..bd8c217ddefe77
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/cpu_py35.bat
@@ -0,0 +1,20 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python35
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat
new file mode 100644
index 00000000000000..0a81a90a43164c
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/cpu_py36.bat
@@ -0,0 +1,20 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python36
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat
new file mode 100644
index 00000000000000..9591d7aac343bd
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/cpu_py37.bat
@@ -0,0 +1,20 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python37
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\cpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow_cpu"
diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat b/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat
new file mode 100644
index 00000000000000..8ab78bef3ca0af
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/gpu_libtensorflow.bat
@@ -0,0 +1,20 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\gpu\bazel\run_libtensorflow.bat || exit /b
+
+copy lib_package %TF_ARTIFACTS_DIR%\lib_package
diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat b/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat
new file mode 100644
index 00000000000000..213de532069244
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/gpu_pip_on_cpu.bat
@@ -0,0 +1,21 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python36
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\integration\gpu_pip_on_cpu\run.bat
+
diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat
new file mode 100644
index 00000000000000..8a21961fdef3db
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/gpu_py35.bat
@@ -0,0 +1,22 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python35
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+bash -l tensorflow\tools\ci_build\release\windows\gpu_py35_full\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat
new file mode 100644
index 00000000000000..7c4a395f62dd11
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/gpu_py36.bat
@@ -0,0 +1,22 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python36
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+bash -l tensorflow\tools\ci_build\release\windows\gpu_py36_full\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat
new file mode 100644
index 00000000000000..97eb1168d1ce0d
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/windows/gpu_py37.bat
@@ -0,0 +1,22 @@
+:: Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+:: =============================================================================
+
+SET PYTHON_DIRECTORY=Python37
+
+CALL tensorflow\tools\ci_build\release\common_win.bat
+
+call tensorflow\tools\ci_build\windows\gpu\pip\run.bat --release_build --extra_build_flags "--config=v2" --extra_test_flags "--test_env=TF2_BEHAVIOR=1" --project_name "tensorflow"
+
+bash -l tensorflow\tools\ci_build\release\windows\gpu_py37_full\release_pip_rename.sh
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 7b273fbfed1cc5..fe1c9f7769da59 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -147,7 +147,7 @@ function install_pip_deps {
   ${SUDO_CMD} ${PIP_CMD} install scikit-learn==0.20.3
   ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly
   ${PIP_CMD} install --user --upgrade attrs
-  ${PIP_CMD} install --user --upgrade tf-estimator-nightly
+  ${PIP_CMD} install --user --upgrade tf-estimator-nightly==2.0.0.dev2020011309
   ${PIP_CMD} install --user --upgrade "future>=0.17.1"
   # ===================================================================
 }
@@ -181,7 +181,7 @@ function install_ubuntu_16_pip_deps {
   "${PIP_CMD}" install portpicker --user
   "${PIP_CMD}" install scipy --user
   "${PIP_CMD}" install scikit-learn --user
-  "${PIP_CMD}" install --user --upgrade tf-estimator-nightly
+  "${PIP_CMD}" install --user --upgrade tf-estimator-nightly==2.0.0.dev2020011309
   "${PIP_CMD}" install --user --upgrade tb-nightly
   # ===================================================================
 }
@@ -224,7 +224,7 @@ function install_macos_pip_deps {
   ${SUDO_CMD} ${PIP_CMD} install --upgrade grpcio
   ${SUDO_CMD} ${PIP_CMD} install --upgrade tb-nightly
   ${PIP_CMD} install --user --upgrade attrs
-  ${PIP_CMD} install --user --upgrade tf-estimator-nightly
+  ${PIP_CMD} install --user --upgrade tf-estimator-nightly==2.0.0.dev2020011309
   ${PIP_CMD} install --user --upgrade "future>=0.17.1"
 }
 
diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat
index 95b09008c542e0..3b19e5a35fdce8 100644
--- a/tensorflow/tools/ci_build/release/common_win.bat
+++ b/tensorflow/tools/ci_build/release/common_win.bat
@@ -28,9 +28,9 @@ SET PATH=%PATH%;C:\%PYTHON_DIRECTORY%
 
 %PIP_EXE% install setuptools --upgrade
 %PIP_EXE% install future>=0.17.1 --no-deps
-%PIP_EXE% install tf-estimator-nightly --no-deps
+%PIP_EXE% install tf-estimator-nightly==2.0.0.dev2020011309 --no-deps
 %PIP_EXE% install tb-nightly --no-deps
-%PIP_EXE% install numpy --upgrade --no-deps
+%PIP_EXE% install numpy==1.18.5 --upgrade --no-deps
 %PIP_EXE% install opt_einsum --upgrade
 %PIP_EXE% install pandas --upgrade --no-deps
 %PIP_EXE% install protobuf --upgrade --no-deps
diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
index 72924fb1c44d1b..143221ef4733de 100755
--- a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
+++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh
@@ -27,19 +27,60 @@ echo ""
 
 # Run configure.
 export PYTHON_BIN_PATH=`which python3`
+export CC_OPT_FLAGS='-mavx'
 
 export TF_NEED_ROCM=1
+export ROCM_PATH=/opt/rocm-3.9.0
 export TF_GPU_COUNT=${N_GPUS}
 
 yes "" | $PYTHON_BIN_PATH configure.py
 echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc
 
-bazel clean
 # Run bazel test command. Double test timeouts to avoid flakes.
-bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss,-no_rocm -k \
-    --jobs=${N_JOBS} --test_timeout 600,900,2400,7200 \
-    --build_tests_only --test_output=errors --local_test_jobs=${TF_GPU_COUNT} \
-    --test_sharding_strategy=disabled \
-    --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-    --config=xla -- \
-    //tensorflow/compiler/...
+bazel test \
+      --config=rocm \
+      --config=xla \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT} \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/compiler/... \
+      -//tensorflow/compiler/tests:dense_layer_test \
+      -//tensorflow/compiler/tests:dense_layer_test_gpu \
+      -//tensorflow/compiler/tests:jit_test \
+      -//tensorflow/compiler/tests:jit_test_gpu \
+      -//tensorflow/compiler/tests:matrix_triangular_solve_op_test \
+      -//tensorflow/compiler/tests:tensor_array_ops_test \
+      -//tensorflow/compiler/tests:xla_ops_test \
+      -//tensorflow/compiler/xla/client/lib:svd_test \
+      -//tensorflow/compiler/tests:lstm_test \
+&& bazel test \
+      --config=rocm \
+      --config=xla \
+      -k \
+      --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-v1only \
+      --jobs=${N_JOBS} \
+      --local_test_jobs=${TF_GPU_COUNT} \
+      --test_timeout 600,900,2400,7200 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_env=TF2_BEHAVIOR=0 \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+      //tensorflow/compiler/tests:dense_layer_test \
+      //tensorflow/compiler/tests:dense_layer_test_gpu \
+      //tensorflow/compiler/tests:jit_test \
+      //tensorflow/compiler/tests:jit_test_gpu \
+      //tensorflow/compiler/tests:matrix_triangular_solve_op_test \
+      //tensorflow/compiler/tests:tensor_array_ops_test \
+      //tensorflow/compiler/tests:xla_ops_test \
+      //tensorflow/compiler/xla/client/lib:svd_test \
+      //tensorflow/compiler/tests:lstm_test
diff --git a/tensorflow/tools/compatibility/BUILD b/tensorflow/tools/compatibility/BUILD
index ea4d532091f05c..769abd60f2ca71 100644
--- a/tensorflow/tools/compatibility/BUILD
+++ b/tensorflow/tools/compatibility/BUILD
@@ -261,6 +261,7 @@ py_test(
     srcs = ["test_file_v2_0.py"],
     python_version = "PY3",
     srcs_version = "PY2AND3",
+    tags = ["no_rocm"],
     deps = [
         "//tensorflow:tensorflow_py",
     ],
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
index 46443bb6946fbe..12987d1a8cfcbf 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile
@@ -66,12 +66,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
index 23e4458689a477..7eddaccfa97c23 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
@@ -119,12 +119,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
index ec75054edc8925..4bc42af7ad09f2 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
@@ -109,3 +109,4 @@ RUN mkdir /bazel && \
     rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
+RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
index ddb10a08a57a22..25537ad963a6b0 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
@@ -152,12 +152,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
index fe2045bf1934f2..19409e27f99614 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
@@ -105,12 +105,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
index 907d6af7b3c42b..2eec5b14b6b523 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile
@@ -84,12 +84,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
index b85f157e5e4783..f07d15bcf9da7d 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile
@@ -120,12 +120,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
index 49110036a1a67e..164f20c7b84dc2 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile
@@ -153,12 +153,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
index 71a1b79a3db70a..cc69d18a810bb6 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile
@@ -123,12 +123,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile
index 8290021a1ac345..ee1d276d97133d 100644
--- a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile
@@ -6,12 +6,12 @@ RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/
 RUN mkdir /.local && chmod a+rwx /.local
 RUN apt-get install -y --no-install-recommends wget
 WORKDIR /tf/tensorflow-tutorials
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/regression.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/save_and_load.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification.ipynb
-RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/text_classification_with_hub.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/overfit_and_underfit.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/regression.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/save_and_load.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification.ipynb
+RUN wget https://raw.githubusercontent.com/tensorflow/docs/r2.1/site/en/tutorials/keras/text_classification_with_hub.ipynb
 COPY readme-for-jupyter.md README.md
 RUN apt-get autoremove -y && apt-get remove -y wget
 WORKDIR /tf
diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml
index 79fb7785d8f7a4..d83b0f8f7de74e 100644
--- a/tensorflow/tools/dockerfiles/spec.yml
+++ b/tensorflow/tools/dockerfiles/spec.yml
@@ -63,7 +63,7 @@ slice_sets:
     py:
         - add_to_name: ""
           args:
-              - USE_PYTHON_3_NOT_2=
+              - USE_PYTHON_3_NOT_2=1
         - add_to_name: "-py3"
           args:
               - USE_PYTHON_3_NOT_2=1
diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD
index 68f04f20dc3c1e..6cdfa736581b45 100644
--- a/tensorflow/tools/docs/BUILD
+++ b/tensorflow/tools/docs/BUILD
@@ -16,6 +16,7 @@ py_test(
     python_version = "PY3",
     tags = [
         "no_oss_py2",
+        "no_rocm",
         "noasan",
         "nomsan",
         "notsan",
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index c1cc78e3269300..099a6a6c3ce9d6 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -47,7 +47,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.1.0'
+_VERSION = '2.1.4'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.7.0',
@@ -57,8 +57,8 @@
     'gast == 0.2.2',
     'google_pasta >= 0.1.6',
     'keras_applications >= 1.0.8',
-    'keras_preprocessing >= 1.1.0',
-    'numpy >= 1.16.0, < 2.0',
+    'keras_preprocessing == 1.1.0',
+    'numpy >= 1.16.0, < 1.19.0',
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.8.0',
     'tensorboard >= 2.1.0, < 2.2.0',
@@ -73,10 +73,8 @@
     # functools comes with python3, need to install the backport for python2
     'functools32 >= 3.2.3;python_version<"3"',
     'six >= 1.12.0',
-    # scipy < 1.4.1 causes segfaults due to pybind11
-    # Latest scipy pip for py2 is scipy==1.2.2
-    'scipy == 1.4.1;python_version>="3"',
-    'scipy == 1.2.2;python_version<"3"',
+    # Pin h5py to at most 2.10.0 as newer versions break old keras tests
+    'h5py <= 2.10.0',
 ]
 
 if sys.byteorder == 'little':
diff --git a/tensorflow/tools/pip_package/setup.py.orig b/tensorflow/tools/pip_package/setup.py.orig
new file mode 100644
index 00000000000000..6f158a8c84db91
--- /dev/null
+++ b/tensorflow/tools/pip_package/setup.py.orig
@@ -0,0 +1,313 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorFlow is an open source machine learning framework for everyone.
+
+TensorFlow is an open source software library for high performance numerical
+computation. Its flexible architecture allows easy deployment of computation
+across a variety of platforms (CPUs, GPUs, TPUs), and from desktops to clusters
+of servers to mobile and edge devices.
+
+Originally developed by researchers and engineers from the Google Brain team
+within Google's AI organization, it comes with strong support for machine
+learning and deep learning and the flexible numerical computation core is used
+across many other scientific domains.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import fnmatch
+import os
+import re
+import sys
+
+from setuptools import Command
+from setuptools import find_packages
+from setuptools import setup
+from setuptools.command.install import install as InstallCommandBase
+from setuptools.dist import Distribution
+
+DOCLINES = __doc__.split('\n')
+
+# This version string is semver compatible, but incompatible with pip.
+# For pip, we will remove all '-' characters from this string, and use the
+# result for pip.
+# Also update tensorflow/tensorflow.bzl and
+# tensorflow/core/public/version.h
+_VERSION = '2.1.2'
+
+REQUIRED_PACKAGES = [
+    'absl-py >= 0.7.0',
+    'astor >= 0.6.0',
+    'backports.weakref >= 1.0rc1;python_version<"3.4"',
+    'enum34 >= 1.1.6;python_version<"3.4"',
+    'gast == 0.2.2',
+    'google_pasta >= 0.1.6',
+    'keras_applications >= 1.0.8',
+    'keras_preprocessing == 1.1.0',
+    'numpy >= 1.16.0, < 1.19.0',
+    'opt_einsum >= 2.3.2',
+    'protobuf >= 3.8.0',
+    'tensorboard >= 2.1.0, < 2.2.0',
+    'tensorflow_estimator >= 2.1.0rc0, < 2.2.0',
+    'termcolor >= 1.1.0',
+    'wrapt >= 1.11.1',
+    # python3 requires wheel 0.26
+    'wheel >= 0.26;python_version>="3"',
+    'wheel;python_version<"3"',
+<<<<<<< HEAD
+    # mock comes with unittest.mock for python3, need to install for python2
+    'mock >= 2.0.0;python_version<"3"',
+    # functools comes with python3, need to install the backport for python2
+    'functools32 >= 3.2.3;python_version<"3"',
+    'six >= 1.12.0',
+=======
+    'wrapt >= 1.11.1',
+    # Pin h5py to at most 2.10.0 as newer versions break old keras tests
+    'h5py <= 2.10.0',
+>>>>>>> 03d7ca7871b (Add upper bound to `h5py`.)
+]
+
+if sys.byteorder == 'little':
+  # grpcio does not build correctly on big-endian machines due to lack of
+  # BoringSSL support.
+  # See https://github.com/tensorflow/tensorflow/issues/17882.
+  REQUIRED_PACKAGES.append('grpcio >= 1.8.6')
+
+project_name = 'tensorflow'
+if '--project_name' in sys.argv:
+  project_name_idx = sys.argv.index('--project_name')
+  project_name = sys.argv[project_name_idx + 1]
+  sys.argv.remove('--project_name')
+  sys.argv.pop(project_name_idx)
+
+# tf-nightly should depend on tb-nightly
+if 'tf_nightly' in project_name:
+  for i, pkg in enumerate(REQUIRED_PACKAGES):
+    if 'tensorboard' in pkg:
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 2.1.0a0, < 2.2.0a0'
+    elif 'tensorflow_estimator' in pkg and '2.0' in project_name:
+      REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview'
+    elif 'tensorflow_estimator' in pkg:
+      REQUIRED_PACKAGES[i] = 'tf-estimator-nightly'
+
+# pylint: disable=line-too-long
+CONSOLE_SCRIPTS = [
+    'toco_from_protos = tensorflow.lite.toco.python.toco_from_protos:main',
+    'tflite_convert = tensorflow.lite.python.tflite_convert:main',
+    'toco = tensorflow.lite.python.tflite_convert:main',
+    'saved_model_cli = tensorflow.python.tools.saved_model_cli:main',
+    # We need to keep the TensorBoard command, even though the console script
+    # is now declared by the tensorboard pip package. If we remove the
+    # TensorBoard command, pip will inappropriately remove it during install,
+    # even though the command is not removed, just moved to a different wheel.
+    'tensorboard = tensorboard.main:run_main',
+    'tf_upgrade_v2 = tensorflow.tools.compatibility.tf_upgrade_v2_main:main',
+    'estimator_ckpt_converter = tensorflow_estimator.python.estimator.tools.checkpoint_converter:main',
+]
+# pylint: enable=line-too-long
+
+# Only keep freeze_graph console script in 1.X.
+if _VERSION.startswith('1.') and '_2.0' not in project_name:
+  CONSOLE_SCRIPTS.append(
+      'freeze_graph = tensorflow.python.tools.freeze_graph:run_main')
+
+# remove the tensorboard console script if building tf_nightly
+if 'tf_nightly' in project_name:
+  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
+
+TEST_PACKAGES = [
+    'scipy >= 0.15.1',
+]
+
+
+class BinaryDistribution(Distribution):
+
+  def has_ext_modules(self):
+    return True
+
+
+class InstallCommand(InstallCommandBase):
+  """Override the dir where the headers go."""
+
+  def finalize_options(self):
+    ret = InstallCommandBase.finalize_options(self)
+    self.install_headers = os.path.join(self.install_purelib, 'tensorflow_core',
+                                        'include')
+    self.install_lib = self.install_platlib
+    return ret
+
+
+class InstallHeaders(Command):
+  """Override how headers are copied.
+
+  The install_headers that comes with setuptools copies all files to
+  the same directory. But we need the files to be in a specific directory
+  hierarchy for -I <include_dir> to work correctly.
+  """
+  description = 'install C/C++ header files'
+
+  user_options = [('install-dir=', 'd',
+                   'directory to install header files to'),
+                  ('force', 'f',
+                   'force installation (overwrite existing files)'),
+                 ]
+
+  boolean_options = ['force']
+
+  def initialize_options(self):
+    self.install_dir = None
+    self.force = 0
+    self.outfiles = []
+
+  def finalize_options(self):
+    self.set_undefined_options('install',
+                               ('install_headers', 'install_dir'),
+                               ('force', 'force'))
+
+  def mkdir_and_copy_file(self, header):
+    install_dir = os.path.join(self.install_dir, os.path.dirname(header))
+    # Get rid of some extra intervening directories so we can have fewer
+    # directories for -I
+    install_dir = re.sub('/google/protobuf_archive/src', '', install_dir)
+    install_dir = re.sub('/include/tensorflow_core/', '/include/tensorflow/',
+                         install_dir)
+
+    # Copy external code headers into tensorflow_core/include.
+    # A symlink would do, but the wheel file that gets created ignores
+    # symlink within the directory hierarchy.
+    # NOTE(keveman): Figure out how to customize bdist_wheel package so
+    # we can do the symlink.
+    external_header_locations = [
+        'tensorflow_core/include/external/eigen_archive/',
+        'tensorflow_core/include/external/com_google_absl/',
+    ]
+    for location in external_header_locations:
+      if location in install_dir:
+        extra_dir = install_dir.replace(location, '')
+        if not os.path.exists(extra_dir):
+          self.mkpath(extra_dir)
+        self.copy_file(header, extra_dir)
+
+    if not os.path.exists(install_dir):
+      self.mkpath(install_dir)
+    return self.copy_file(header, install_dir)
+
+  def run(self):
+    hdrs = self.distribution.headers
+    if not hdrs:
+      return
+
+    self.mkpath(self.install_dir)
+    for header in hdrs:
+      (out, _) = self.mkdir_and_copy_file(header)
+      self.outfiles.append(out)
+
+  def get_inputs(self):
+    return self.distribution.headers or []
+
+  def get_outputs(self):
+    return self.outfiles
+
+
+def find_files(pattern, root):
+  """Return all the files matching pattern below root dir."""
+  for dirpath, _, files in os.walk(root):
+    for filename in fnmatch.filter(files, pattern):
+      yield os.path.join(dirpath, filename)
+
+
+so_lib_paths = [
+    i for i in os.listdir('.')
+    if os.path.isdir(i) and fnmatch.fnmatch(i, '_solib_*')
+]
+
+matches = []
+for path in so_lib_paths:
+  matches.extend(
+      ['../' + x for x in find_files('*', path) if '.py' not in x]
+  )
+
+if os.name == 'nt':
+  EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.pyd'
+else:
+  EXTENSION_NAME = 'python/_pywrap_tensorflow_internal.so'
+
+headers = (
+    list(find_files('*.h', 'tensorflow_core/core')) +
+    list(find_files('*.h', 'tensorflow_core/stream_executor')) +
+    list(find_files('*.h', 'google/com_google_protobuf/src')) +
+    list(find_files('*.inc', 'google/com_google_protobuf/src')) +
+    list(find_files('*', 'third_party/eigen3')) + list(
+        find_files('*.h', 'tensorflow_core/include/external/com_google_absl')) +
+    list(
+        find_files('*.inc', 'tensorflow_core/include/external/com_google_absl'))
+    + list(find_files('*', 'tensorflow_core/include/external/eigen_archive')))
+
+setup(
+    name=project_name,
+    version=_VERSION.replace('-', ''),
+    description=DOCLINES[0],
+    long_description='\n'.join(DOCLINES[2:]),
+    url='https://www.tensorflow.org/',
+    download_url='https://github.com/tensorflow/tensorflow/tags',
+    author='Google Inc.',
+    author_email='packages@tensorflow.org',
+    # Contained modules and scripts.
+    packages=find_packages(),
+    entry_points={
+        'console_scripts': CONSOLE_SCRIPTS,
+    },
+    headers=headers,
+    install_requires=REQUIRED_PACKAGES,
+    tests_require=REQUIRED_PACKAGES + TEST_PACKAGES,
+    # Add in any packaged data.
+    include_package_data=True,
+    package_data={
+        'tensorflow': [
+            EXTENSION_NAME,
+        ] + matches,
+    },
+    zip_safe=False,
+    distclass=BinaryDistribution,
+    cmdclass={
+        'install_headers': InstallHeaders,
+        'install': InstallCommand,
+    },
+    # PyPI package information.
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'Intended Audience :: Education',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Topic :: Scientific/Engineering',
+        'Topic :: Scientific/Engineering :: Mathematics',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+    ],
+    license='Apache 2.0',
+    keywords='tensorflow tensor machine learning',
+)
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 77e605fe76a6aa..41d6b1bae75133 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -275,12 +275,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "org_sqlite",
         build_file = clean_dep("//third_party:sqlite.BUILD"),
-        sha256 = "adf051d4c10781ea5cfabbbc4a2577b6ceca68590d23b58b8260a8e24cc5f081",
-        strip_prefix = "sqlite-amalgamation-3300100",
+        sha256 = "8ff0b79fd9118af7a760f1f6a98cac3e69daed325c8f9f0a581ecb62f797fd64",
+        strip_prefix = "sqlite-amalgamation-3340000",
         system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2019/sqlite-amalgamation-3300100.zip",
-            "https://www.sqlite.org/2019/sqlite-amalgamation-3300100.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/www.sqlite.org/2020/sqlite-amalgamation-3340000.zip",
+            "https://www.sqlite.org/2020/sqlite-amalgamation-3340000.zip",
         ],
     )
 
@@ -477,12 +477,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "pcre",
         build_file = clean_dep("//third_party:pcre.BUILD"),
-        sha256 = "69acbc2fbdefb955d42a4c606dfde800c2885711d2979e356c0636efde9ec3b5",
-        strip_prefix = "pcre-8.42",
+        sha256 = "aecafd4af3bd0f3935721af77b889d9024b2e01d96b58471bd91a3063fb47728",
+        strip_prefix = "pcre-8.44",
         system_build_file = clean_dep("//third_party/systemlibs:pcre.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/ftp.exim.org/pub/pcre/pcre-8.42.tar.gz",
-            "http://ftp.exim.org/pub/pcre/pcre-8.42.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/ftp.exim.org/pub/pcre/pcre-8.44.tar.gz",
+            "https://ftp.exim.org/pub/pcre/pcre-8.44.tar.gz",
         ],
     )
 
@@ -502,12 +502,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "curl",
         build_file = clean_dep("//third_party:curl.BUILD"),
-        sha256 = "d0393da38ac74ffac67313072d7fe75b1fa1010eb5987f63f349b024a36b7ffb",
-        strip_prefix = "curl-7.66.0",
+        sha256 = "3b4378156ba09e224008e81dcce854b7ce4d182b1f9cfb97fe5ed9e9c18c6bd3",
+        strip_prefix = "curl-7.76.0",
         system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/curl.haxx.se/download/curl-7.66.0.tar.gz",
-            "https://curl.haxx.se/download/curl-7.66.0.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/curl.haxx.se/download/curl-7.76.0.tar.gz",
+            "https://curl.haxx.se/download/curl-7.76.0.tar.gz",
         ],
     )
 
@@ -571,12 +571,12 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "jsoncpp_git",
         build_file = clean_dep("//third_party:jsoncpp.BUILD"),
-        sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6",
-        strip_prefix = "jsoncpp-1.8.4",
+        sha256 = "e34a628a8142643b976c7233ef381457efad79468c67cb1ae0b83a33d7493999",
+        strip_prefix = "jsoncpp-1.9.4",
         system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"),
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
-            "https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/open-source-parsers/jsoncpp/archive/1.9.4.tar.gz",
+            "https://github.com/open-source-parsers/jsoncpp/archive/1.9.4.tar.gz",
         ],
     )
 
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index c28dd15461624e..a7a072631440c9 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -25,20 +25,33 @@ CURL_WIN_SRCS = [
     "lib/asyn-thread.c",
     "lib/inet_ntop.c",
     "lib/system_win32.c",
-    "lib/vtls/schannel.c",
-    "lib/idn_win32.c",
+    "lib/setup-win32.h",
 ]
 
 cc_library(
     name = "curl",
     srcs = [
         "include/curl_config.h",
+        "lib/altsvc.c",
+        "lib/altsvc.h",
+        "lib/amigaos.c",
         "lib/amigaos.h",
         "lib/arpa_telnet.h",
-        "lib/asyn.h",
         "lib/asyn-ares.c",
+        "lib/asyn.h",
         "lib/base64.c",
+        "lib/c-hyper.c",
+        "lib/c-hyper.h",
+        "lib/config-amigaos.h",
+        "lib/config-dos.h",
+        "lib/config-mac.h",
+        "lib/config-os400.h",
+        "lib/config-plan9.h",
+        "lib/config-riscos.h",
+        "lib/config-tpf.h",
+        "lib/config-vxworks.h",
         "lib/config-win32.h",
+        "lib/config-win32ce.h",
         "lib/conncache.c",
         "lib/conncache.h",
         "lib/connect.c",
@@ -52,14 +65,20 @@ cc_library(
         "lib/curl_base64.h",
         "lib/curl_ctype.c",
         "lib/curl_ctype.h",
+        "lib/curl_des.c",
         "lib/curl_des.h",
+        "lib/curl_endian.c",
         "lib/curl_endian.h",
         "lib/curl_fnmatch.c",
         "lib/curl_fnmatch.h",
+        "lib/curl_get_line.c",
+        "lib/curl_get_line.h",
         "lib/curl_gethostname.c",
         "lib/curl_gethostname.h",
+        "lib/curl_gssapi.c",
         "lib/curl_gssapi.h",
         "lib/curl_hmac.h",
+        "lib/curl_krb5.h",
         "lib/curl_ldap.h",
         "lib/curl_md4.h",
         "lib/curl_md5.h",
@@ -68,14 +87,19 @@ cc_library(
         "lib/curl_memrchr.h",
         "lib/curl_multibyte.c",
         "lib/curl_multibyte.h",
+        "lib/curl_ntlm_core.c",
         "lib/curl_ntlm_core.h",
+        "lib/curl_ntlm_wb.c",
         "lib/curl_ntlm_wb.h",
+        "lib/curl_path.c",
+        "lib/curl_path.h",
         "lib/curl_printf.h",
+        "lib/curl_range.c",
+        "lib/curl_range.h",
         "lib/curl_rtmp.c",
         "lib/curl_rtmp.h",
         "lib/curl_sasl.c",
         "lib/curl_sasl.h",
-        "lib/curl_sec.h",
         "lib/curl_setup.h",
         "lib/curl_setup_once.h",
         "lib/curl_sha256.h",
@@ -84,23 +108,35 @@ cc_library(
         "lib/curl_threads.c",
         "lib/curl_threads.h",
         "lib/curlx.h",
+        "lib/dict.c",
         "lib/dict.h",
+        "lib/doh.c",
+        "lib/doh.h",
         "lib/dotdot.c",
         "lib/dotdot.h",
+        "lib/dynbuf.c",
+        "lib/dynbuf.h",
         "lib/easy.c",
+        "lib/easygetopt.c",
         "lib/easyif.h",
+        "lib/easyoptions.c",
+        "lib/easyoptions.h",
         "lib/escape.c",
         "lib/escape.h",
+        "lib/file.c",
         "lib/file.h",
         "lib/fileinfo.c",
         "lib/fileinfo.h",
         "lib/formdata.c",
         "lib/formdata.h",
+        "lib/ftp.c",
         "lib/ftp.h",
+        "lib/ftplistparser.c",
         "lib/ftplistparser.h",
         "lib/getenv.c",
         "lib/getinfo.c",
         "lib/getinfo.h",
+        "lib/gopher.c",
         "lib/gopher.h",
         "lib/hash.c",
         "lib/hash.h",
@@ -113,6 +149,8 @@ cc_library(
         "lib/hostip4.c",
         "lib/hostip6.c",
         "lib/hostsyn.c",
+        "lib/hsts.c",
+        "lib/hsts.h",
         "lib/http.c",
         "lib/http.h",
         "lib/http2.c",
@@ -121,17 +159,24 @@ cc_library(
         "lib/http_chunks.h",
         "lib/http_digest.c",
         "lib/http_digest.h",
+        "lib/http_negotiate.c",
         "lib/http_negotiate.h",
+        "lib/http_ntlm.c",
         "lib/http_ntlm.h",
         "lib/http_proxy.c",
         "lib/http_proxy.h",
+        "lib/http_aws_sigv4.c",
+        "lib/http_aws_sigv4.h",
+        "lib/idn_win32.c",
         "lib/if2ip.c",
         "lib/if2ip.h",
+        "lib/imap.c",
         "lib/imap.h",
         "lib/inet_ntop.h",
         "lib/inet_pton.c",
         "lib/inet_pton.h",
         "lib/krb5.c",
+        "lib/ldap.c",
         "lib/llist.c",
         "lib/llist.h",
         "lib/md4.c",
@@ -141,36 +186,43 @@ cc_library(
         "lib/mime.c",
         "lib/mime.h",
         "lib/mprintf.c",
+        "lib/mqtt.c",
+        "lib/mqtt.h",
         "lib/multi.c",
         "lib/multihandle.h",
         "lib/multiif.h",
         "lib/netrc.c",
         "lib/netrc.h",
+        "lib/non-ascii.c",
         "lib/non-ascii.h",
         "lib/nonblock.c",
         "lib/nonblock.h",
-        "lib/nwlib.c",
-        "lib/nwos.c",
+        #"lib/nwlib.c",
+        #"lib/nwos.c",
+        "lib/openldap.c",
         "lib/parsedate.c",
         "lib/parsedate.h",
-        "lib/pingpong.h",
         "lib/pingpong.c",
+        "lib/pingpong.h",
+        "lib/pop3.c",
         "lib/pop3.h",
         "lib/progress.c",
         "lib/progress.h",
+        "lib/psl.c",
+        "lib/psl.h",
         "lib/quic.h",
         "lib/rand.c",
         "lib/rand.h",
+        "lib/rename.c",
+        "lib/rename.h",
         "lib/rtsp.c",
         "lib/rtsp.h",
-        "lib/security.c",
         "lib/select.c",
         "lib/select.h",
         "lib/sendf.c",
         "lib/sendf.h",
         "lib/setopt.c",
         "lib/setopt.h",
-        "lib/setup-os400.h",
         "lib/setup-vms.h",
         "lib/sha256.c",
         "lib/share.c",
@@ -178,16 +230,21 @@ cc_library(
         "lib/sigpipe.h",
         "lib/slist.c",
         "lib/slist.h",
+        "lib/smb.c",
         "lib/smb.h",
+        "lib/smtp.c",
         "lib/smtp.h",
         "lib/sockaddr.h",
+        "lib/socketpair.c",
+        "lib/socketpair.h",
         "lib/socks.c",
         "lib/socks.h",
+        "lib/socks_gssapi.c",
+        "lib/socks_sspi.c",
         "lib/speedcheck.c",
         "lib/speedcheck.h",
         "lib/splay.c",
         "lib/splay.h",
-        "lib/ssh.h",
         "lib/strcase.c",
         "lib/strcase.h",
         "lib/strdup.c",
@@ -199,7 +256,9 @@ cc_library(
         "lib/strtoofft.c",
         "lib/strtoofft.h",
         "lib/system_win32.h",
+        "lib/telnet.c",
         "lib/telnet.h",
+        "lib/tftp.c",
         "lib/tftp.h",
         "lib/timeval.c",
         "lib/timeval.h",
@@ -208,44 +267,69 @@ cc_library(
         "lib/url.c",
         "lib/url.h",
         "lib/urldata.h",
+        "lib/urlapi-int.h",
+        "lib/urlapi.c",
+        "lib/version.c",
+        "lib/version_win32.c",
+        "lib/version_win32.h",
+        "lib/warnless.c",
+        "lib/warnless.h",
+        "lib/wildcard.c",
+        "lib/wildcard.h",
+        "lib/x509asn1.c",
+        "lib/x509asn1.h",
         "lib/vauth/cleartext.c",
         "lib/vauth/cram.c",
         "lib/vauth/digest.c",
         "lib/vauth/digest.h",
+        "lib/vauth/digest_sspi.c",
+        "lib/vauth/krb5_gssapi.c",
+        "lib/vauth/krb5_sspi.c",
+        "lib/vauth/ntlm.c",
         "lib/vauth/ntlm.h",
+        "lib/vauth/ntlm_sspi.c",
         "lib/vauth/oauth2.c",
+        "lib/vauth/spnego_sspi.c",
         "lib/vauth/vauth.c",
         "lib/vauth/vauth.h",
-        "lib/version.c",
+        "lib/vquic/ngtcp2.c",
+        "lib/vquic/ngtcp2.h",
+        "lib/vquic/quiche.c",
+        "lib/vquic/quiche.h",
+        "lib/vquic/vquic.c",
+        "lib/vquic/vquic.h",
+        "lib/vssh/libssh.c",
+        "lib/vssh/libssh2.c",
+        "lib/vssh/ssh.h",
+        "lib/vssh/wolfssh.c",
+        "lib/vtls/bearssl.c",
+        "lib/vtls/bearssl.h",
+        "lib/vtls/gskit.c",
         "lib/vtls/gskit.h",
+        "lib/vtls/gtls.c",
         "lib/vtls/gtls.h",
+        "lib/vtls/keylog.c",
+        "lib/vtls/keylog.h",
+        "lib/vtls/mbedtls.c",
         "lib/vtls/mbedtls.h",
+        "lib/vtls/mbedtls_threadlock.c",
+        "lib/vtls/mbedtls_threadlock.h",
+        "lib/vtls/mesalink.c",
+        "lib/vtls/mesalink.h",
+        "lib/vtls/nss.c",
         "lib/vtls/nssg.h",
+        "lib/vtls/openssl.c",
         "lib/vtls/openssl.h",
-        "lib/vtls/polarssl.h",
-        "lib/vtls/polarssl_threadlock.h",
+        "lib/vtls/rustls.c",
+        "lib/vtls/rustls.h",
+        "lib/vtls/schannel.c",
         "lib/vtls/schannel.h",
+        "lib/vtls/schannel_verify.c",
+        "lib/vtls/sectransp.h",
         "lib/vtls/vtls.c",
         "lib/vtls/vtls.h",
+        "lib/vtls/wolfssl.c",
         "lib/vtls/wolfssl.h",
-        "lib/warnless.c",
-        "lib/warnless.h",
-        "lib/wildcard.c",
-        "lib/wildcard.h",
-        "lib/x509asn1.h",
-        "lib/psl.h",
-        "lib/psl.c",
-        "lib/vtls/sectransp.h",
-        "lib/vtls/mesalink.h",
-        "lib/vtls/mesalink.c",
-        "lib/curl_get_line.h",
-        "lib/curl_get_line.c",
-        "lib/urlapi-int.h",
-        "lib/urlapi.c",
-        "lib/altsvc.h",
-        "lib/altsvc.c",
-        "lib/doh.h",
-        "lib/doh.c",
     ] + select({
         "@org_tensorflow//tensorflow:macos": [
             "lib/vtls/sectransp.c",
@@ -255,7 +339,6 @@ cc_library(
         ],
         "@org_tensorflow//tensorflow:windows": CURL_WIN_SRCS,
         "//conditions:default": [
-            "lib/vtls/openssl.c",
         ],
     }),
     hdrs = [
@@ -264,6 +347,7 @@ cc_library(
         "include/curl/easy.h",
         "include/curl/mprintf.h",
         "include/curl/multi.h",
+        "include/curl/options.h",
         "include/curl/stdcheaders.h",
         "include/curl/system.h",
         "include/curl/typecheck-gcc.h",
@@ -367,6 +451,8 @@ cc_binary(
         "src/tool_doswin.h",
         "src/tool_easysrc.c",
         "src/tool_easysrc.h",
+        "src/tool_filetime.c",
+        "src/tool_filetime.h",
         "src/tool_formparse.c",
         "src/tool_formparse.h",
         "src/tool_getparam.c",
@@ -401,6 +487,8 @@ cc_binary(
         "src/tool_paramhlp.h",
         "src/tool_parsecfg.c",
         "src/tool_parsecfg.h",
+        "src/tool_progress.c",
+        "src/tool_progress.h",
         "src/tool_sdecls.h",
         "src/tool_setopt.c",
         "src/tool_setopt.h",
@@ -420,6 +508,8 @@ cc_binary(
         "src/tool_writeenv.h",
         "src/tool_writeout.c",
         "src/tool_writeout.h",
+        "src/tool_writeout_json.c",
+        "src/tool_writeout_json.h",
         "src/tool_xattr.c",
         "src/tool_xattr.h",
     ],
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
index ff359cedced961..fd35360da28208 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
@@ -49,7 +49,7 @@ struct scalar_product_traits<QInt32, double> {
 // the compiler from silently type cast the mantissa into a bigger or a smaller
 // representation.
 struct QInt8 {
-  QInt8() {}
+  QInt8() : value(0) {}
   QInt8(const int8_t v) : value(v) {}
   QInt8(const QInt32 v);
 
@@ -59,7 +59,7 @@ struct QInt8 {
 };
 
 struct QUInt8 {
-  QUInt8() {}
+  QUInt8() : value(0) {}
   QUInt8(const uint8_t v) : value(v) {}
   QUInt8(const QInt32 v);
 
@@ -69,7 +69,7 @@ struct QUInt8 {
 };
 
 struct QInt16 {
-  QInt16() {}
+  QInt16() : value(0) {}
   QInt16(const int16_t v) : value(v) {}
   QInt16(const QInt32 v);
   operator int() const { return static_cast<int>(value); }
@@ -78,7 +78,7 @@ struct QInt16 {
 };
 
 struct QUInt16 {
-  QUInt16() {}
+  QUInt16() : value(0) {}
   QUInt16(const uint16_t v) : value(v) {}
   QUInt16(const QInt32 v);
   operator int() const { return static_cast<int>(value); }
@@ -87,7 +87,7 @@ struct QUInt16 {
 };
 
 struct QInt32 {
-  QInt32() {}
+  QInt32() : value(0) {}
   QInt32(const int8_t v) : value(v) {}
   QInt32(const int32_t v) : value(v) {}
   QInt32(const uint32_t v) : value(static_cast<int32_t>(v)) {}
diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
index 8a94afbfde1f34..df473138244291 100755
--- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
@@ -173,12 +173,20 @@ def InvokeHipcc(argv, log=False):
   out = ' -o ' + out_file[0]
 
   hipccopts = ' '
+  # In hip-clang environment, we need to make sure that hip header is included
+  # before some standard math header like <complex> is included in any source.
+  # Otherwise, we get build error.
+  # Also we need to retain warning about uninitialised shared variable as
+  # warning only, even when -Werror option is specified.
+  if HIPCC_IS_HIPCLANG:
+    hipccopts += ' --include=hip/hip_runtime.h '
   hipccopts += ' ' + hipcc_compiler_options
   # Use -fno-gpu-rdc by default for early GPU kernel finalization
   # This flag would trigger GPU kernels be generated at compile time, instead
   # of link time. This allows the default host compiler (gcc) be used as the
   # linker for TensorFlow on ROCm platform.
   hipccopts += ' -fno-gpu-rdc '
+  hipccopts += ' -fcuda-flush-denormals-to-zero '
   hipccopts += undefines
   hipccopts += defines
   hipccopts += std_options
@@ -251,6 +259,8 @@ def main():
     gpu_linker_flags.append('-L' + HIP_RUNTIME_PATH)
     gpu_linker_flags.append('-Wl,-rpath=' + HIP_RUNTIME_PATH)
     gpu_linker_flags.append('-l' + HIP_RUNTIME_LIBRARY)
+    if HIPCC_IS_HIPCLANG:
+      gpu_linker_flags.append("-lrt")
 
     if VERBOSE: print(' '.join([CPU_COMPILER] + gpu_linker_flags))
     return subprocess.call([CPU_COMPILER] + gpu_linker_flags)
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index af1bc96f000e9c..9b9262f6327f51 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -932,23 +932,32 @@ def make_copy_files_rule(repository_ctx, name, srcs, outs):
     cmd = \"""%s \""",
 )""" % (name, "\n".join(outs), " && \\\n".join(cmds))
 
-def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir):
-    """Returns a rule to recursively copy a directory."""
+def make_copy_dir_rule(repository_ctx, name, src_dir, out_dir, exceptions=None):
+    """Returns a rule to recursively copy a directory.
+    If exceptions is not None, it must be a list of files or directories in
+    'src_dir'; these will be excluded from copying.
+    """
     src_dir = _norm_path(src_dir)
     out_dir = _norm_path(out_dir)
     outs = _read_dir(repository_ctx, src_dir)
+    post_cmd=''
+    if exceptions!=None:
+      outs = [x for x in outs if not any([x.startswith(src_dir+"/"+y)
+        for y in exceptions])]
     outs = [('        "%s",' % out.replace(src_dir, out_dir)) for out in outs]
-
     # '@D' already contains the relative path for a single file, see
     # http://docs.bazel.build/versions/master/be/make-variables.html#predefined_genrule_variables
     out_dir = "$(@D)/%s" % out_dir if len(outs) > 1 else "$(@D)"
+    if exceptions!=None:
+      for x in exceptions:
+        post_cmd+=" ; rm -fR " + out_dir + "/" + x
     return """genrule(
     name = "%s",
     outs = [
 %s
     ],
-    cmd = \"""cp -rLf "%s/." "%s/" \""",
-)""" % (name, "\n".join(outs), src_dir, out_dir)
+    cmd = \"""cp -rLf "%s/." "%s/" %s\""",
+)""" % (name, "\n".join(outs), src_dir, out_dir, post_cmd)
 
 def _read_dir(repository_ctx, src_dir):
     """Returns a string with all files in a directory.
diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl
index c5f25a845cae13..957413b9acd734 100644
--- a/third_party/gpus/rocm/rocm_config.h.tpl
+++ b/third_party/gpus/rocm/rocm_config.h.tpl
@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef ROCM_ROCM_CONFIG_H_
 #define ROCM_ROCM_CONFIG_H_
 
-#define TF_ROCM_TOOLKIT_PATH "/opt/rocm"
+#define TF_ROCM_TOOLKIT_PATH "%{rocm_toolkit_path}"
 
 #endif  // ROCM_ROCM_CONFIG_H_
diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl
index 7a02bb4a280a34..760f213a0114c9 100644
--- a/third_party/gpus/rocm_configure.bzl
+++ b/third_party/gpus/rocm_configure.bzl
@@ -22,7 +22,7 @@ load(
 
 _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
 _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
-_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH"
+_ROCM_TOOLKIT_PATH = "ROCM_PATH"
 _TF_ROCM_VERSION = "TF_ROCM_VERSION"
 _TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION"
 _TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
@@ -192,50 +192,56 @@ def _rocm_include_path(repository_ctx, rocm_config):
     inc_dirs.append(rocm_config.rocm_toolkit_path + "/include")
 
     # Add HSA headers
-    inc_dirs.append("/opt/rocm/hsa/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hsa/include")
 
     # Add HIP headers
-    inc_dirs.append("/opt/rocm/include/hip")
-    inc_dirs.append("/opt/rocm/include/hip/hcc_detail")
-    inc_dirs.append("/opt/rocm/hip/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/include/hip")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/include/hip/hcc_detail")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hip/include")
 
     # Add HIP-Clang headers
-    inc_dirs.append("/opt/rocm/llvm/lib/clang/8.0/include")
-    inc_dirs.append("/opt/rocm/llvm/lib/clang/9.0.0/include")
-    inc_dirs.append("/opt/rocm/llvm/lib/clang/10.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/llvm/lib/clang/8.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/llvm/lib/clang/9.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/llvm/lib/clang/10.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/llvm/lib/clang/11.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/llvm/lib/clang/12.0.0/include")
 
     # Add rocrand and hiprand headers
-    inc_dirs.append("/opt/rocm/rocrand/include")
-    inc_dirs.append("/opt/rocm/hiprand/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/rocrand/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hiprand/include")
 
     # Add rocfft headers
-    inc_dirs.append("/opt/rocm/rocfft/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/rocfft/include")
 
     # Add rocBLAS headers
-    inc_dirs.append("/opt/rocm/rocblas/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/rocblas/include")
 
     # Add MIOpen headers
-    inc_dirs.append("/opt/rocm/miopen/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/miopen/include")
 
     # Add RCCL headers
-    inc_dirs.append("/opt/rocm/rccl/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/rccl/include")
 
     # Add hcc headers
-    inc_dirs.append("/opt/rocm/hcc/include")
-    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/")
-    inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/7.0.0/include/")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/7.0.0/include")
 
     # Newer hcc builds use/are based off of clang 8.0.0.
-    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/")
-    inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/8.0.0/include/")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/8.0.0/include")
 
     # Support hcc based off clang 9.0.0, included in ROCm2.2
-    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/9.0.0/include/")
-    inc_dirs.append("/opt/rocm/hcc/lib/clang/9.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/9.0.0/include/")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/9.0.0/include")
 
     # Support hcc based off clang 10.0.0, included in ROCm2.8
-    inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/10.0.0/include/")
-    inc_dirs.append("/opt/rocm/hcc/lib/clang/10.0.0/include")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/10.0.0/include/")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/10.0.0/include")
+
+    # Support hcc based off clang 11.0.0, included in ROCm3.1
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/compiler/lib/clang/11.0.0/include/")
+    inc_dirs.append(rocm_config.rocm_toolkit_path + "/hcc/lib/clang/11.0.0/include")
 
     return inc_dirs
 
@@ -301,11 +307,12 @@ def _hipcc_env(repository_ctx):
                          repository_ctx.os.environ[name].strip() + "\";")
     return hipcc_env.strip()
 
-def _hipcc_is_hipclang(repository_ctx):
+def _hipcc_is_hipclang(repository_ctx,rocm_config):
     """Returns if hipcc is based on hip-clang toolchain.
 
     Args:
         repository_ctx: The repository context.
+        rocm_config: The path to the hip compiler.
 
     Returns:
         A string "True" if hipcc is based on hip-clang toolchain.
@@ -320,21 +327,22 @@ def _hipcc_is_hipclang(repository_ctx):
     # grep for "HIP_COMPILER=clang" in /opt/rocm/hip/lib/.hipInfo
     grep_result = _execute(
         repository_ctx,
-        ["grep", "HIP_COMPILER=clang", "/opt/rocm/hip/lib/.hipInfo"],
+        ["grep", "HIP_COMPILER=clang", rocm_config.rocm_toolkit_path + "/hip/lib/.hipInfo"],
         empty_stdout_fine = True,
     )
-    result = grep_result.stdout
+    result = grep_result.stdout.strip()
     if result == "HIP_COMPILER=clang":
         return "True"
     return "False"
 
-def _if_hipcc_is_hipclang(repository_ctx, if_true, if_false = []):
+def _if_hipcc_is_hipclang(repository_ctx, rocm_config, if_true, if_false = []):
     """
     Returns either the if_true or if_false arg based on whether hipcc
     is based on the hip-clang toolchain
 
     Args :
         repository_ctx: The repository context.
+        rocm_config: The path to the hip compiler.
         if_true : value to return if hipcc is hip-clang based
         if_false : value to return if hipcc is not hip-clang based
                    (optional, defaults to empty list)
@@ -342,7 +350,7 @@ def _if_hipcc_is_hipclang(repository_ctx, if_true, if_false = []):
     Returns :
         either the if_true arg or the of_False arg
     """
-    if _hipcc_is_hipclang(repository_ctx) == "True":
+    if _hipcc_is_hipclang(repository_ctx,rocm_config) == "True":
         return if_true
     return if_false
 
@@ -450,7 +458,7 @@ def _find_libs(repository_ctx, rocm_config):
     """
     return {
         "hip": _find_rocm_lib(
-            "hip_hcc",
+            _if_hipcc_is_hipclang(repository_ctx, rocm_config, "amdhip64", "hip_hcc"),
             repository_ctx,
             rocm_config.rocm_toolkit_path,
         ),
@@ -679,6 +687,7 @@ def _create_local_rocm_repository(repository_ctx):
             name = "rocm-include",
             src_dir = rocm_toolkit_path + "/include",
             out_dir = "rocm/include",
+            exceptions = ["gtest", "gmock"],
         ),
         make_copy_dir_rule(
             repository_ctx,
@@ -763,7 +772,7 @@ def _create_local_rocm_repository(repository_ctx):
 
     rocm_defines["%{host_compiler_prefix}"] = host_compiler_prefix
 
-    rocm_defines["%{linker_bin_path}"] = "/opt/rocm/hcc/compiler/bin"
+    rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + "/hcc/compiler/bin"
 
     # For gcc, do not canonicalize system header paths; some versions of gcc
     # pick the shortest possible path for system includes when creating the
@@ -776,7 +785,7 @@ def _create_local_rocm_repository(repository_ctx):
         "-DTENSORFLOW_USE_ROCM=1",
         "-D__HIP_PLATFORM_HCC__",
         "-DEIGEN_USE_HIP",
-    ] + _if_hipcc_is_hipclang(repository_ctx, [
+    ] + _if_hipcc_is_hipclang(repository_ctx, rocm_config, [
         #
         # define "TENSORFLOW_COMPILER_IS_HIP_CLANG" when we are using clang
         # based hipcc to compile/build tensorflow
@@ -818,14 +827,14 @@ def _create_local_rocm_repository(repository_ctx):
         "crosstool:clang/bin/crosstool_wrapper_driver_rocm",
         {
             "%{cpu_compiler}": str(cc),
-            "%{hipcc_path}": "/opt/rocm/bin/hipcc",
+            "%{hipcc_path}": rocm_config.rocm_toolkit_path + "/bin/hipcc",
             "%{hipcc_env}": _hipcc_env(repository_ctx),
-            "%{hipcc_is_hipclang}": _hipcc_is_hipclang(repository_ctx),
-            "%{rocr_runtime_path}": "/opt/rocm/lib",
+            "%{hipcc_is_hipclang}": _hipcc_is_hipclang(repository_ctx,rocm_config),
+            "%{rocr_runtime_path}": rocm_config.rocm_toolkit_path + "/lib",
             "%{rocr_runtime_library}": "hsa-runtime64",
-            "%{hip_runtime_path}": "/opt/rocm/hip/lib",
-            "%{hip_runtime_library}": "hip_hcc",
-            "%{hcc_runtime_path}": "/opt/rocm/hcc/lib",
+            "%{hip_runtime_path}": rocm_config.rocm_toolkit_path + "/hip/lib",
+            "%{hip_runtime_library}": _if_hipcc_is_hipclang(repository_ctx, rocm_config, "amdhip64", "hip_hcc"),
+            "%{hcc_runtime_path}": rocm_config.rocm_toolkit_path + "/hcc/lib",
             "%{hcc_runtime_library}": "mcwamp",
             "%{crosstool_verbose}": _crosstool_verbose(repository_ctx),
             "%{gcc_host_compiler_path}": str(cc),
diff --git a/third_party/jpeg/BUILD.bazel b/third_party/jpeg/BUILD.bazel
index 90e45237c7d488..269e5254c86c96 100644
--- a/third_party/jpeg/BUILD.bazel
+++ b/third_party/jpeg/BUILD.bazel
@@ -516,30 +516,30 @@ JCONFIG_NOWIN_COMMON_SUBSTITUTIONS = {
     "@JPEG_LIB_VERSION@": "62",
     "@VERSION@": "2.0.0",
     "@LIBJPEG_TURBO_VERSION_NUMBER@": "2000000",
-    "#cmakedefine C_ARITH_CODING_SUPPORTED": "#define C_ARITH_CODING_SUPPORTED",
-    "#cmakedefine D_ARITH_CODING_SUPPORTED": "#define D_ARITH_CODING_SUPPORTED",
-    "#cmakedefine MEM_SRCDST_SUPPORTED": "#define MEM_SRCDST_SUPPORTED",
+    "#cmakedefine C_ARITH_CODING_SUPPORTED 1": "#define C_ARITH_CODING_SUPPORTED 1",
+    "#cmakedefine D_ARITH_CODING_SUPPORTED 1": "#define D_ARITH_CODING_SUPPORTED 1",
+    "#cmakedefine MEM_SRCDST_SUPPORTED 1": "#define MEM_SRCDST_SUPPORTED 1",
     "@BITS_IN_JSAMPLE@": "8",
-    "#cmakedefine HAVE_LOCALE_H": "#define HAVE_LOCALE_H 1",
-    "#cmakedefine HAVE_STDDEF_H": "#define HAVE_STDDEF_H 1",
-    "#cmakedefine HAVE_STDLIB_H": "#define HAVE_STDLIB_H 1",
-    "#cmakedefine NEED_SYS_TYPES_H": "#define NEED_SYS_TYPES_H",
-    "#cmakedefine NEED_BSD_STRINGS": "",
-    "#cmakedefine HAVE_UNSIGNED_CHAR": "#define HAVE_UNSIGNED_CHAR 1",
-    "#cmakedefine HAVE_UNSIGNED_SHORT": "#define HAVE_UNSIGNED_SHORT 1",
-    "#cmakedefine INCOMPLETE_TYPES_BROKEN": "",
-    "#cmakedefine RIGHT_SHIFT_IS_UNSIGNED": "",
-    "#cmakedefine __CHAR_UNSIGNED__": "",
+    "#cmakedefine HAVE_LOCALE_H 1": "#define HAVE_LOCALE_H 1",
+    "#cmakedefine HAVE_STDDEF_H 1": "#define HAVE_STDDEF_H 1",
+    "#cmakedefine HAVE_STDLIB_H 1": "#define HAVE_STDLIB_H 1",
+    "#cmakedefine NEED_SYS_TYPES_H 1": "#define NEED_SYS_TYPES_H 1",
+    "#cmakedefine NEED_BSD_STRINGS 1": "",
+    "#cmakedefine HAVE_UNSIGNED_CHAR 1": "#define HAVE_UNSIGNED_CHAR 1",
+    "#cmakedefine HAVE_UNSIGNED_SHORT 1": "#define HAVE_UNSIGNED_SHORT 1",
+    "#cmakedefine INCOMPLETE_TYPES_BROKEN 1": "",
+    "#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1": "",
+    "#cmakedefine __CHAR_UNSIGNED__ 1": "",
     "#undef const": "",
     "#undef size_t": "",
 }
 
 JCONFIG_NOWIN_SIMD_SUBSTITUTIONS = {
-    "#cmakedefine WITH_SIMD": "#define WITH_SIMD",
+    "#cmakedefine WITH_SIMD 1": "#define WITH_SIMD 1",
 }
 
 JCONFIG_NOWIN_NOSIMD_SUBSTITUTIONS = {
-    "#cmakedefine WITH_SIMD": "",
+    "#cmakedefine WITH_SIMD 1": "",
 }
 
 JCONFIG_NOWIN_SIMD_SUBSTITUTIONS.update(JCONFIG_NOWIN_COMMON_SUBSTITUTIONS)
diff --git a/third_party/jpeg/workspace.bzl b/third_party/jpeg/workspace.bzl
index e2137ba949feba..60f989df722152 100644
--- a/third_party/jpeg/workspace.bzl
+++ b/third_party/jpeg/workspace.bzl
@@ -6,11 +6,11 @@ def repo():
     third_party_http_archive(
         name = "libjpeg_turbo",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
-            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.5.tar.gz",
+            "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.5.tar.gz",
         ],
-        sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b",
-        strip_prefix = "libjpeg-turbo-2.0.0",
+        sha256 = "b3090cd37b5a8b3e4dbd30a1311b3989a894e5d3c668f14cbc6739d77c9402b7",
+        strip_prefix = "libjpeg-turbo-2.0.5",
         build_file = "//third_party/jpeg:BUILD.bazel",
         system_build_file = "//third_party/jpeg:BUILD.system",
     )
diff --git a/third_party/jsoncpp.BUILD b/third_party/jsoncpp.BUILD
index cf3cba05556a0b..ee3abd70502400 100644
--- a/third_party/jsoncpp.BUILD
+++ b/third_party/jsoncpp.BUILD
@@ -12,11 +12,10 @@ cc_library(
         "src/lib_json/json_writer.cpp",
     ],
     hdrs = [
-        "include/json/autolink.h",
         "include/json/config.h",
-        "include/json/features.h",
         "include/json/forwards.h",
         "include/json/json.h",
+        "include/json/json_features.h",
         "include/json/reader.h",
         "include/json/value.h",
         "include/json/version.h",