diff --git a/.github/workflows/create_issue.js b/.github/workflows/create_issue.js
deleted file mode 100644
index eca20953f8484a..00000000000000
--- a/.github/workflows/create_issue.js
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * @license
- * Copyright 2021 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-
-/** Extracts PR from commit message and creates a GitHub Issue on Rollback of PR
-  Created issue is assigned to original PR owner and reviewer.
-
-  @param {!object}
-    github enables querying for PR and also create issue using rest endpoint
-    context has the commit message details in the payload
-  @return {string} Returns the issue number and title
-*/
-module.exports = async ({github, context}) => {
-  const rollback_commit = context.payload.head_commit.id;
-  const pr_match_groups = context.payload.head_commit.message.match(/\Rollback of PR #(\d+).*/) || [];
-  if (pr_match_groups.length != 2) {
-    console.log(`PR Number not found in ${context.payload.head_commit.message}`);
-    throw "Error extracting PR Number from commit message";
-  }
-  const pr_number = parseInt(pr_match_groups[1]);
-  const owner = context.payload.repository.owner.name;
-  const repo = context.payload.repository.name;
-  console.log(`Original PR: ${pr_number} and Rollback Commit: ${rollback_commit}`);
-  // Get the Original PR Details
-  const pr_resp = await github.rest.pulls.get({
-    owner,
-    repo,
-    pull_number: pr_number
-  });
-  if (pr_resp.status != 200 || pr_resp.data.state != 'closed') {
-    console.log(`PR:{pr_number} is not found or closed.  Not a valid condition to create an issue.`);
-    console.log(pr_resp);
-    throw `PR:{pr_number} needs to be valid and closed (merged)`;
-  }
-  const pr_title = pr_resp.data.title;
-  // Assign to PR owner and reviewers
-  const assignees = pr_resp.data.assignees.concat(pr_resp.data.requested_reviewers);
-  let assignee_logins = assignees.map(x => x.login);
-  assignee_logins.push(pr_resp.data.user.login);
-  console.log(assignee_logins);
-  // Create an new GH Issue and reference the Original PR
-  const resp = await github.rest.issues.create({
-    owner,
-    repo,
-    assignees: assignee_logins,
-    title: `Issue created for Rollback of PR #${pr_number}: ${pr_title}`,
-    body: `Merged PR #${pr_number} is rolled back in ${rollback_commit}.
-    Please follow up with the reviewer and close this issue once its resolved.`
-  });
-  return `Issue created: ${resp.data.number} with Title: ${resp.data.title}`;
-};
diff --git a/.github/workflows/issue-on-pr-rollback.yml b/.github/workflows/issue-on-pr-rollback.yml
deleted file mode 100644
index ce0182bedc2937..00000000000000
--- a/.github/workflows/issue-on-pr-rollback.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-name: Creates a GitHub Issue when a PR Rolled back via Commit to Master
-on:
-  push:
-    branches:
-      - master
-
-jobs:
-  create-issue-on-pr-rollback:
-    runs-on: ubuntu-latest
-    if: |
-      github.repository == 'tensorflow/tensorflow' &&
-      startsWith(github.event.head_commit.message, 'Rollback of PR #')
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v2
-      - name: Create a new Github Issue
-        uses: actions/github-script@v5
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          script: |
-            const script = require('./.github/workflows/create_issue.js')
-            console.log(await script({github, context}))
diff --git a/.github/workflows/pylint-presubmit.yml b/.github/workflows/pylint-presubmit.yml
deleted file mode 100644
index b469f047f4d423..00000000000000
--- a/.github/workflows/pylint-presubmit.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-name: PyLint
-on:
-  pull_request:
-    paths:
-      - '**.py'
-
-jobs:
-  build:
-    name: PyLint
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
-    - name: Get file changes
-      id: get_file_changes
-      uses: trilom/file-changes-action@v1.2.4
-      with:
-        output: ' '
-    - name: Report list of changed files
-      run: |
-        echo Changed files: ${{ steps.get_file_changes.outputs.files }}
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: "3.9"
-    - name: Install Python dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint numpy wheel
-        pip install keras_preprocessing --no-deps
-    - name: Run PyLint on changed files
-      run: |
-        echo "${{ steps.get_file_changes.outputs.files}}" | tr " " "\n" | grep ".py$" | xargs pylint --rcfile=tensorflow/tools/ci_build/pylintrc
diff --git a/.github/workflows/update-nightly.yml b/.github/workflows/update-nightly.yml
deleted file mode 100644
index 0265ffbebe2ec0..00000000000000
--- a/.github/workflows/update-nightly.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-on:
-  workflow_dispatch:  # Allow manual triggers
-  schedule:
-    - cron: 0 4 * * *  # 4am UTC is 9pm PDT and 8pm PST
-name: Set nightly branch to master HEAD
-jobs:
-  master-to-nightly:
-    if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks
-    runs-on: ubuntu-latest
-    steps:
-    - uses: zofrex/mirror-branch@v1
-      name: Set nightly branch to master HEAD
-      with:
-        target-branch: 'nightly'
diff --git a/RELEASE.md b/RELEASE.md
index 0e738d5f9e5a49..a9bd22d65f1545 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,133 @@
+# Release 2.8.4
+
+This release introduces several vulnerability fixes:
+
+*   Fixes a heap OOB failure in `ThreadUnsafeUnigramCandidateSampler` caused by missing validation ([CVE-2022-41880](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41880))
+*   Fixes a segfault in `ndarray_tensor_bridge` ([CVE-2022-41884](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41884))
+*   Fixes an overflow in `FusedResizeAndPadConv2D` ([CVE-2022-41885](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41885))
+*   Fixes a overflow in `ImageProjectiveTransformV2` ([CVE-2022-41886](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41886))
+*   Fixes an FPE in `tf.image.generate_bounding_box_proposals` on GPU ([CVE-2022-41888](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41888))
+*   Fixes a segfault in `pywrap_tfe_src` caused by invalid attributes ([CVE-2022-41889](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41889))
+*   Fixes a `CHECK` fail in `BCast` ([CVE-2022-41890](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41890))
+*   Fixes a segfault in `TensorListConcat` ([CVE-2022-41891](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41891))
+*   Fixes a `CHECK_EQ` fail in `TensorListResize` ([CVE-2022-41893](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41893))
+*   Fixes an overflow in `CONV_3D_TRANSPOSE` on TFLite ([CVE-2022-41894](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41894))
+*   Fixes a heap OOB in `MirrorPadGrad` ([CVE-2022-41895](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41895))
+*   Fixes a crash in `Mfcc` ([CVE-2022-41896](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41896))
+*   Fixes a heap OOB in `FractionalMaxPoolGrad` ([CVE-2022-41897](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41897))
+*   Fixes a `CHECK` fail in `SparseFillEmptyRowsGrad` ([CVE-2022-41898](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41898))
+*   Fixes a `CHECK` fail in `SdcaOptimizer` ([CVE-2022-41899](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41899))
+*   Fixes a heap OOB in `FractionalAvgPool` and `FractionalMaxPool`([CVE-2022-41900](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41900))
+*   Fixes a `CHECK_EQ` in `SparseMatrixNNZ` ([CVE-2022-41901](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41901))
+*   Fixes an OOB write in grappler ([CVE-2022-41902](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41902))
+*   Fixes a overflow in `ResizeNearestNeighborGrad` ([CVE-2022-41907](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41907))
+*   Fixes a `CHECK` fail in `PyFunc` ([CVE-2022-41908](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41908))
+*   Fixes a segfault in `CompositeTensorVariantToComponents` ([CVE-2022-41909](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41909))
+*   Fixes a invalid char to bool conversion in printing a tensor ([CVE-2022-41911](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41911))
+*   Fixes a heap overflow in `QuantizeAndDequantizeV2` ([CVE-2022-41910](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41910))
+*   Fixes a `CHECK` failure in `SobolSample` via missing validation ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+*   Fixes a `CHECK` fail in `TensorListScatter` and `TensorListScatterV2` in eager mode ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+
+# Release 2.8.3
+
+This releases introduces several vulnerability fixes:
+*   Fixes a `CHECK` failure in tf.reshape caused by overflows ([CVE-2022-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35934))
+*   Fixes a `CHECK` failure in `SobolSample` caused by missing validation ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+*   Fixes an OOB read in `Gather_nd` op in TF Lite ([CVE-2022-35937](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35937))
+*   Fixes a `CHECK` failure in `TensorListReserve` caused by missing validation ([CVE-2022-35960](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35960))
+*   Fixes an OOB write in `Scatter_nd` op in TF Lite ([CVE-2022-35939](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35939))
+*   Fixes an integer overflow in `RaggedRangeOp` ([CVE-2022-35940](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35940))
+*   Fixes a `CHECK` failure in `AvgPoolOp` ([CVE-2022-35941](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35941))
+*   Fixes a `CHECK` failures in `UnbatchGradOp` ([CVE-2022-35952](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35952))
+*   Fixes a segfault TFLite converter on per-channel quantized transposed convolutions ([CVE-2022-36027](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36027))
+*   Fixes a `CHECK` failures in `AvgPool3DGrad` ([CVE-2022-35959](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35959))
+*   Fixes a `CHECK` failures in `FractionalAvgPoolGrad` ([CVE-2022-35963](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35963))
+*   Fixes a segfault in `BlockLSTMGradV2` ([CVE-2022-35964](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35964))
+*   Fixes a segfault in `LowerBound` and `UpperBound` ([CVE-2022-35965](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35965))
+*   Fixes a segfault in `QuantizedAvgPool` ([CVE-2022-35966](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35966))
+*   Fixes a segfault in `QuantizedAdd` ([CVE-2022-35967](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35967))
+*   Fixes a `CHECK` fail in `AvgPoolGrad` ([CVE-2022-35968](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35968))
+*   Fixes a `CHECK` fail in `Conv2DBackpropInput` ([CVE-2022-35969](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35969))
+*   Fixes a segfault in `QuantizedInstanceNorm` ([CVE-2022-35970](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35970))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVars` ([CVE-2022-35971](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35971))
+*   Fixes a segfault in `Requantize` ([CVE-2022-36017](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36017))
+*   Fixes a segfault in `QuantizedBiasAdd` ([CVE-2022-35972](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35972))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsPerChannel` ([CVE-2022-36019](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36019))
+*   Fixes a segfault in `QuantizedMatMul` ([CVE-2022-35973](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35973))
+*   Fixes a segfault in `QuantizeDownAndShrinkRange` ([CVE-2022-35974](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35974))
+*   Fixes segfaults in `QuantizedRelu` and `QuantizedRelu6` ([CVE-2022-35979](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35979))
+*   Fixes a `CHECK` fail in `FractionalMaxPoolGrad` ([CVE-2022-35981](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35981))
+*   Fixes a `CHECK` fail in `RaggedTensorToVariant` ([CVE-2022-36018](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36018))
+*   Fixes a `CHECK` fail in `QuantizeAndDequantizeV3` ([CVE-2022-36026](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36026))
+*   Fixes a segfault in `SparseBincount` ([CVE-2022-35982](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35982))
+*   Fixes a `CHECK` fail in `Save` and `SaveSlices` ([CVE-2022-35983](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35983))
+*   Fixes a `CHECK` fail in `ParameterizedTruncatedNormal` ([CVE-2022-35984](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35984))
+*   Fixes a `CHECK` fail in `LRNGrad` ([CVE-2022-35985](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35985))
+*   Fixes a segfault in `RaggedBincount` ([CVE-2022-35986](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35986))
+*   Fixes a `CHECK` fail in `DenseBincount` ([CVE-2022-35987](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35987))
+*   Fixes a `CHECK` fail in `tf.linalg.matrix_rank` ([CVE-2022-35988](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35988))
+*   Fixes a `CHECK` fail in `MaxPool` ([CVE-2022-35989](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35989))
+*   Fixes a `CHECK` fail in `Conv2DBackpropInput` ([CVE-2022-35999](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35999))
+*   Fixes a `CHECK` fail in `EmptyTensorList` ([CVE-2022-35998](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35998))
+*   Fixes a `CHECK` fail in `tf.sparse.cross` ([CVE-2022-35997](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35997))
+*   Fixes a floating point exception in `Conv2D` ([CVE-2022-35996](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35996))
+*   Fixes a `CHECK` fail in `AudioSummaryV2` ([CVE-2022-35995](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35995))
+*   Fixes a `CHECK` fail in `CollectiveGather` ([CVE-2022-35994](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35994))
+*   Fixes a `CHECK` fail in `SetSize` ([CVE-2022-35993](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35993))
+*   Fixes a `CHECK` fail in `TensorListFromTensor` ([CVE-2022-35992](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35992))
+*   Fixes a `CHECK` fail in `TensorListScatter` and `TensorListScatterV2` ([CVE-2022-35991](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35991))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsPerChannelGradient` ([CVE-2022-35990](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35990))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsGradient` ([CVE-2022-36005](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36005))
+*   Fixes a `CHECK` fail in `tf.random.gamma` ([CVE-2022-36004](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36004))
+*   Fixes a `CHECK` fail in `RandomPoissonV2` ([CVE-2022-36003](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36003))
+*   Fixes a `CHECK` fail in `Unbatch` ([CVE-2022-36002](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36002))
+*   Fixes a `CHECK` fail in `DrawBoundingBoxes` ([CVE-2022-36001](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36001))
+*   Fixes a `CHECK` fail in `Eig` ([CVE-2022-36000](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36000))
+*   Fixes a null dereference on MLIR on empty function attributes ([CVE-2022-36011](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36011))
+*   Fixes an assertion failure on MLIR empty edge names ([CVE-2022-36012](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36012))
+*   Fixes a null-dereference in `mlir::tfg::GraphDefImporter::ConvertNodeDef` ([CVE-2022-36013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36013))
+*   Fixes a null-dereference in `mlir::tfg::TFOp::nameAttr` ([CVE-2022-36014](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36014))
+*   Fixes an integer overflow in math ops ([CVE-2022-36015](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36015))
+*   Fixes a `CHECK`-fail in `tensorflow::full_type::SubstituteFromAttrs` ([CVE-2022-36016](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36016))
+*   Fixes an OOB read in `Gather_nd` op in TF Lite Micro ([CVE-2022-35938](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35938))
+
+# Release 2.8.2
+
+Add an upper bound for `protobuf` in `setup.py` since `protobuf` after version 3.20 is currently incompatible with TensorFlow. See https://github.com/tensorflow/tensorflow/issues/53234, https://github.com/protocolbuffers/protobuf/issues/9954 and https://github.com/tensorflow/tensorflow/issues/56077.
+
+# Release 2.8.1
+
+This releases introduces several vulnerability fixes:
+
+*   Fixes a code injection in `saved_model_cli` ([CVE-2022-29216](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29216))
+*   Fixes a missing validation which causes `TensorSummaryV2` to crash ([CVE-2022-29193](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29193))
+*   Fixes a missing validation which crashes `QuantizeAndDequantizeV4Grad` ([CVE-2022-29192](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29192))
+*   Fixes a missing validation which causes denial of service via `DeleteSessionTensor` ([CVE-2022-29194](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29194))
+*   Fixes a missing validation which causes denial of service via `GetSessionTensor` ([CVE-2022-29191](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29191))
+*   Fixes a missing validation which causes denial of service via `StagePeek` ([CVE-2022-29195](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29195))
+*   Fixes a missing validation which causes denial of service via `UnsortedSegmentJoin` ([CVE-2022-29197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29197))
+*   Fixes a missing validation which causes denial of service via `LoadAndRemapMatrix` ([CVE-2022-29199](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29199))
+*   Fixes a missing validation which causes denial of service via `SparseTensorToCSRSparseMatrix` ([CVE-2022-29198](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29198))
+*   Fixes a missing validation which causes denial of service via `LSTMBlockCell` ([CVE-2022-29200](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29200))
+*   Fixes a missing validation which causes denial of service via `Conv3DBackpropFilterV2` ([CVE-2022-29196](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29196))
+*   Fixes a `CHECK` failure in depthwise ops via overflows ([CVE-2021-41197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-41197))
+*   Fixes issues arising from undefined behavior stemming from users supplying invalid resource handles ([CVE-2022-29207](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29207))
+*   Fixes a segfault due to missing support for quantized types ([CVE-2022-29205](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29205))
+*   Fixes a missing validation which results in undefined behavior in `SparseTensorDenseAdd` ([CVE-2022-29206](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29206))
+*   Fixes a missing validation which results in undefined behavior in `QuantizedConv2D` ([CVE-2022-29201](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29201))
+*   Fixes an integer overflow in `SpaceToBatchND` ([CVE-2022-29203](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29203))
+*   Fixes a segfault and OOB write due to incomplete validation in `EditDistance` ([CVE-2022-29208](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29208))
+*   Fixes a missing validation which causes denial of service via `Conv3DBackpropFilterV2` ([CVE-2022-29204](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29204))
+*   Fixes a denial of service in `tf.ragged.constant` due to lack of validation ([CVE-2022-29202](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29202))
+*   Fixes a segfault when `tf.histogram_fixed_width` is called with NaN values ([CVE-2022-29211](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29211))
+*   Fixes a core dump when loading TFLite models with quantization ([CVE-2022-29212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29212))
+*   Fixes crashes stemming from incomplete validation in signal ops ([CVE-2022-29213](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29213))
+*   Fixes a type confusion leading to `CHECK`-failure based denial of service ([CVE-2022-29209](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29209))
+*   Fixes a heap buffer overflow due to incorrect hash function ([CVE-2022-29210](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-29210))
+*   Updates `curl` to `7.83.1` to handle ([CVE-2022-22576](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-22576), ([CVE-2022-27774](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27774), ([CVE-2022-27775](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27775), ([CVE-2022-27776](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27776), ([CVE-2022-27778](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27778), ([CVE-2022-27779](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27779), ([CVE-2022-27780](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27780), ([CVE-2022-27781](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27781), ([CVE-2022-27782](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-27782) and ([CVE-2022-30115](https://cve.mitre.org/cgi-bin/cvename.cgi?name=VE-2022-30115)
+*   Updates `zlib` to `1.2.12` after `1.2.11` was pulled due to [security issue](https://www.openwall.com/lists/oss-security/2022/03/28/1)
+
+
 # Release 2.8.0
 
 ## Major Features and Improvements
diff --git a/tensorflow/compiler/tests/spacetobatch_op_test.py b/tensorflow/compiler/tests/spacetobatch_op_test.py
index bb3d4b1812080c..016c05f11e0c2a 100644
--- a/tensorflow/compiler/tests/spacetobatch_op_test.py
+++ b/tensorflow/compiler/tests/spacetobatch_op_test.py
@@ -17,6 +17,7 @@
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
@@ -145,6 +146,29 @@ def testLargerInputBatch2x2(self):
     self._testOne(x_np, block_size, x_out)
 
 
+class SpaceToBatchNDErrorHandlingTest(xla_test.XLATestCase):
+
+  def testInvalidBlockShape(self):
+    with self.assertRaisesRegex(ValueError, "block_shape must be positive"):
+      with self.session() as sess, self.test_scope():
+        tf_in = constant_op.constant(
+            -3.5e+35, shape=[10, 20, 20], dtype=dtypes.float32)
+        block_shape = constant_op.constant(-10, shape=[2], dtype=dtypes.int64)
+        paddings = constant_op.constant(0, shape=[2, 2], dtype=dtypes.int32)
+        sess.run(array_ops.space_to_batch_nd(tf_in, block_shape, paddings))
+
+  def testOutputSizeOutOfBounds(self):
+    with self.assertRaisesRegex(ValueError,
+                                "Negative.* dimension size caused by overflow"):
+      with self.session() as sess, self.test_scope():
+        tf_in = constant_op.constant(
+            -3.5e+35, shape=[10, 19, 22], dtype=dtypes.float32)
+        block_shape = constant_op.constant(
+            1879048192, shape=[2], dtype=dtypes.int64)
+        paddings = constant_op.constant(0, shape=[2, 2], dtype=dtypes.int32)
+        sess.run(array_ops.space_to_batch_nd(tf_in, block_shape, paddings))
+
+
 class SpaceToBatchNDTest(xla_test.XLATestCase):
   """Tests input-output pairs for the SpaceToBatchND and BatchToSpaceND ops."""
 
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 06b415a6763679..28c8f7f3781eae 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -382,6 +382,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/core/util:overflow",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 480db6d848525d..f3fbc2b171b1d7 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -207,6 +207,7 @@ tf_kernel_library(
         "//tensorflow/core/kernels:stateful_random_ops_header",
         "//tensorflow/core/kernels:stateless_random_ops_v2_header",
         "//tensorflow/core/tpu:tpu_defs",
+        "//tensorflow/core/util:overflow",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
diff --git a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
index a4e9aec1c97058..d6e38f1309f91c 100644
--- a/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/spacetobatch_op.cc
@@ -17,6 +17,7 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 namespace {
@@ -60,10 +61,14 @@ void SpaceToBatch(XlaOpKernelContext* ctx, const xla::XlaOp& input,
     int64_t pad_end = paddings.Get<int64_t>({i, 1});
     OP_REQUIRES(ctx, pad_start >= 0 && pad_end >= 0,
                 errors::InvalidArgument("Paddings must be non-negative"));
+    OP_REQUIRES(ctx, block_shape[i] >= 1,
+                errors::InvalidArgument(
+                    "All values in block_shape must be positive, got value, ",
+                    block_shape[i], " at index ", i, "."));
     dim->set_edge_padding_low(pad_start);
     dim->set_edge_padding_high(pad_end);
     padded_shape[1 + i] += pad_start + pad_end;
-    block_num_elems *= block_shape[i];
+    block_num_elems = MultiplyWithoutOverflow(block_num_elems, block_shape[i]);
   }
   // Don't pad the remainder dimensions.
   for (int i = 0; i < remainder_shape.size(); ++i) {
@@ -72,6 +77,16 @@ void SpaceToBatch(XlaOpKernelContext* ctx, const xla::XlaOp& input,
   OP_REQUIRES(ctx, block_num_elems > 0,
               errors::InvalidArgument(
                   "The product of the block dimensions must be positive"));
+  const int64_t batch_size = input_shape[0];
+  const int64_t output_dim =
+      MultiplyWithoutOverflow(batch_size, block_num_elems);
+  if (output_dim < 0) {
+    OP_REQUIRES(
+        ctx, output_dim >= 0,
+        errors::InvalidArgument("Negative output dimension size caused by "
+                                "overflow when multiplying ",
+                                batch_size, " and ", block_num_elems));
+  }
 
   xla::XlaOp padded =
       xla::Pad(input, XlaHelpers::Zero(b, input_dtype), padding_config);
@@ -85,7 +100,6 @@ void SpaceToBatch(XlaOpKernelContext* ctx, const xla::XlaOp& input,
   //       padded_shape[M] / block_shape[M-1],
   //       block_shape[M-1]] +
   //      remaining_shape
-  const int64_t batch_size = input_shape[0];
   std::vector<int64_t> reshaped_padded_shape(input_rank + block_rank);
   reshaped_padded_shape[0] = batch_size;
   for (int i = 0; i < block_rank; ++i) {
@@ -134,7 +148,7 @@ void SpaceToBatch(XlaOpKernelContext* ctx, const xla::XlaOp& input,
   // Determine the length of the prefix of block dims that can be combined
   // into the batch dimension due to having no padding and block_shape=1.
   std::vector<int64_t> output_shape(input_rank);
-  output_shape[0] = batch_size * block_num_elems;
+  output_shape[0] = output_dim;
   for (int i = 0; i < block_rank; ++i) {
     output_shape[1 + i] = padded_shape[1 + i] / block_shape[i];
   }
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 5dee6dda09b7dc..1c70d1af88fb5d 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 
@@ -443,6 +444,16 @@ Status XlaOpKernelContext::ConstantInputAsShape(int index, TensorShape* shape,
   TF_RETURN_IF_ERROR(ConstantInput(index, &literal, mode));
   std::vector<int64_t> dims;
   TF_RETURN_IF_ERROR(LiteralToInt64Vector(literal, &dims));
+
+  int64_t num_elements = 1;
+  for (auto i = dims.begin(); i != dims.end(); ++i) {
+    num_elements = MultiplyWithoutOverflow(num_elements, *i);
+    if (num_elements < 0)
+      return errors::InvalidArgument(
+          "The total elements specified by orig_input_shape is too large.",
+          "Encountered overflow after multiplying", *i,
+          ", result: ", num_elements);
+  }
   *shape = TensorShape(dims);
   return Status::OK();
 }
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 594c0352086305..4ad2e2d60aa2af 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -303,6 +303,9 @@ Status GetDeviceForInput(const EagerContext& ctx, TensorHandle* tensor_handle,
     const Tensor* tensor;
     // TODO(fishx): Avoid blocking here.
     TF_RETURN_IF_ERROR(tensor_handle->Tensor(&tensor));
+    if (tensor->NumElements() == 0) {
+      return errors::InvalidArgument("Empty resource handle");
+    }
     const ResourceHandle& handle = tensor->flat<ResourceHandle>()(0);
     device_name = handle.device();
 
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index 08ab6d90833c1b..bc6c36feb1a9f6 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -883,6 +883,7 @@ cc_library(
         "//tensorflow/core/lib/strings:scanner",
         "//tensorflow/core/lib/strings:str_util",
         "//tensorflow/core/platform:macros",
+        "//tensorflow/core/util:overflow",
         "@com_google_absl//absl/memory",
     ],
 )
diff --git a/tensorflow/core/framework/full_type_util.cc b/tensorflow/core/framework/full_type_util.cc
index b69c6c38e9e6ea..09d096bd8d218b 100644
--- a/tensorflow/core/framework/full_type_util.cc
+++ b/tensorflow/core/framework/full_type_util.cc
@@ -174,7 +174,11 @@ Status SubstituteVar(AttrMap& attrs, FullTypeDef& t) {
 }
 
 Status SubstituteForEach(AttrMap& attrs, FullTypeDef& t) {
-  DCHECK_EQ(t.args_size(), 3);
+  if (t.args_size() != 3) {
+    return Status(error::INVALID_ARGUMENT,
+                  absl::StrCat("illegal FOR_EACH type, expected 3 args, got ",
+                               t.args_size()));
+  }
 
   const auto& cont = t.args(0);
   const auto& tmpl = t.args(1);
diff --git a/tensorflow/core/framework/full_type_util_test.cc b/tensorflow/core/framework/full_type_util_test.cc
index 16477f0e5e3aef..ab5bacba641fcb 100644
--- a/tensorflow/core/framework/full_type_util_test.cc
+++ b/tensorflow/core/framework/full_type_util_test.cc
@@ -491,6 +491,19 @@ TEST(SpecializeType, ForEachOverridesTargetOfNestedForEach) {
   EXPECT_EQ(t_actual.args(1).args(0).args(0).args_size(), 0);
 }
 
+TEST(SpecializeType, ForEachRejectsMalformedInput) {
+  OpDef op;
+  FullTypeDef* t = op.add_output_arg()->mutable_experimental_full_type();
+  t->set_type_id(TFT_FOR_EACH);
+  t->add_args()->set_type_id(TFT_PRODUCT);
+
+  NodeDef ndef;
+  AttrSlice attrs(ndef);
+
+  FullTypeDef ft;
+  EXPECT_FALSE(SpecializeType(attrs, op, ft).ok());
+}
+
 TEST(GetArgDefaults, DefaultUnsetFromNoArgs) {
   FullTypeDef t;
 
diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc
index 73a985810ce27b..7bd0567fb3bfc5 100644
--- a/tensorflow/core/framework/shape_inference.cc
+++ b/tensorflow/core/framework/shape_inference.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 namespace shape_inference {
@@ -1098,7 +1099,7 @@ Status InferenceContext::Multiply(DimensionHandle first,
     *out = UnknownDim();
   } else {
     // Invariant: Both values are known and greater than 1.
-    const int64_t product = first_value * second_value;
+    const int64_t product = MultiplyWithoutOverflow(first_value, second_value);
     if (product < 0) {
       return errors::InvalidArgument(
           "Negative dimension size caused by overflow when multiplying ",
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index c7a08ee0808043..7400651425bbb4 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -29,6 +29,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/tensor.h"
 
+#include <memory>
 #include <utility>
 
 #include "absl/strings/escaping.h"
@@ -1176,12 +1177,10 @@ void PrintOneDimV2(int dim_index, const gtl::InlinedVector<int64, 4>& shape,
 }
 
 template <typename T>
-string SummarizeArray(int64_t limit, int64_t num_elts,
-                      const TensorShape& tensor_shape, const char* data,
-                      const bool print_v2) {
+string SummarizeArrayInternal(int64_t limit, int64_t num_elts,
+                              const TensorShape& tensor_shape, const T* array,
+                              const bool print_v2) {
   string ret;
-  const T* array = reinterpret_cast<const T*>(data);
-
   const gtl::InlinedVector<int64_t, 4> shape = tensor_shape.dim_sizes();
   if (shape.empty()) {
     for (int64_t i = 0; i < limit; ++i) {
@@ -1204,6 +1203,29 @@ string SummarizeArray(int64_t limit, int64_t num_elts,
 
   return ret;
 }
+
+template <typename T>
+string SummarizeArray(int64_t limit, int64_t num_elts,
+                      const TensorShape& tensor_shape, const char* data,
+                      const bool print_v2) {
+  const T* array = reinterpret_cast<const T*>(data);
+  return SummarizeArrayInternal<T>(limit, num_elts, tensor_shape, array,
+                                   print_v2);
+}
+
+template <>
+string SummarizeArray<bool>(int64_t limit, int64_t num_elts,
+                            const TensorShape& tensor_shape, const char* data,
+                            const bool print_v2) {
+  // We first convert all chars to be 0/1 to not get InvalidEnumValue sanitizer
+  // error
+  auto mutable_data = std::unique_ptr<char[]>(new char[num_elts]);
+  for (int64_t i = 0; i < num_elts; ++i)
+    mutable_data.get()[i] = data[i] ? 1 : 0;
+  bool* array = reinterpret_cast<bool*>(mutable_data.get());
+  return SummarizeArrayInternal<bool>(limit, num_elts, tensor_shape, array,
+                                      print_v2);
+}
 }  // namespace
 
 string Tensor::SummarizeValue(int64_t max_entries, bool print_v2) const {
diff --git a/tensorflow/core/framework/tensor_key.h b/tensorflow/core/framework/tensor_key.h
index 243205dc50f838..3bde6fce6214c7 100644
--- a/tensorflow/core/framework/tensor_key.h
+++ b/tensorflow/core/framework/tensor_key.h
@@ -16,6 +16,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_KEY_H_
 
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.h"
 
 namespace tensorflow {
 
@@ -32,8 +33,7 @@ class TensorKey : public Tensor {
     }
     if (DataTypeCanUseMemcpy(t1.dtype())) {
       return t1.tensor_data() == t2.tensor_data();
-    }
-    if (t1.dtype() == DT_STRING) {
+    } else if (t1.dtype() == DT_STRING) {
       const auto s1 = t1.unaligned_flat<tstring>();
       const auto s2 = t2.unaligned_flat<tstring>();
       for (int64_t i = 0, n = t1.NumElements(); i < n; ++i) {
@@ -42,6 +42,9 @@ class TensorKey : public Tensor {
         }
       }
       return true;
+    } else {
+      DCHECK(false) << "Unimplemented dtype " << DataTypeString(t1.dtype())
+                    << std::endl;
     }
     return false;
   }
@@ -53,14 +56,19 @@ class TensorKey : public Tensor {
   // Needed for absl hash function.
   template <typename H>
   friend H AbslHashValue(H h, const TensorKey& k) {
-    const uint8* d = static_cast<uint8*>(k.data());
-    size_t s = k.AllocatedBytes();
-    std::vector<uint8> vec;
-    vec.reserve(s);
-    for (int i = 0; i < s; i++) {
-      vec.push_back(d[i]);
+    if (DataTypeCanUseMemcpy(k.dtype())) {
+      return H::combine(std::move(h), k.tensor_data());
+    } else if (k.dtype() == DT_STRING) {
+      const auto strs = k.unaligned_flat<tstring>();
+      for (int64_t i = 0, n = k.NumElements(); i < n; ++i) {
+        h = H::combine(std::move(h), strs(i));
+      }
+      return h;
+    } else {
+      DCHECK(false) << "Unimplemented dtype " << DataTypeString(k.dtype())
+                    << std::endl;
     }
-    return H::combine(std::move(h), s);
+    return h;
   }
 };
 
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 4b647284f2955f..2f9f53fb44cd50 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -291,6 +291,11 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
 
   std::vector<const FunctionDef::ArgAttrs*> arg_attr(inputs.size(), nullptr);
   for (const auto& attr : func.arg_attr()) {
+    if (attr.first >= inputs.size()) {
+      return errors::InvalidArgument("Invalid attribute index, got ",
+                                     attr.first, " but expected less than ",
+                                     inputs.size());
+    }
     arg_attr.at(attr.first) = &attr.second;
   }
 
diff --git a/tensorflow/core/ir/importexport/functiondef_import.cc b/tensorflow/core/ir/importexport/functiondef_import.cc
index ad63e805dded4d..3400f16c0d2317 100644
--- a/tensorflow/core/ir/importexport/functiondef_import.cc
+++ b/tensorflow/core/ir/importexport/functiondef_import.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/protobuf/graph_debug_info.pb.h"
+#include "tensorflow/core/platform/statusor.h"
 
 using tensorflow::AttrValue;
 using tensorflow::FunctionDef;
@@ -43,6 +44,7 @@ using tensorflow::NodeDef;
 using tensorflow::OpDef;
 using tensorflow::OpDef_AttrDef;
 using tensorflow::Status;
+using tensorflow::StatusOr;
 using tensorflow::errors::InvalidArgument;
 using tensorflow::protobuf::RepeatedPtrField;
 
@@ -168,9 +170,12 @@ Status ImportNodes(ValueMapManager value_manager,
     if (node.op().empty()) return InvalidArgument("empty op type");
     OperationState state(unknown_loc, absl::StrCat("tfg.", node.op()));
     // Fetch the inputs, creating placeholder if an input hasn't been visited.
-    for (const std::string& input : node.input())
+    for (const std::string& input : node.input()) {
+      if (input.empty())
+        return InvalidArgument("Node '", node.name(), "' has an empty input");
       state.operands.push_back(
           value_manager.GetValueOrCreatePlaceholder(input));
+    }
     // Retrieve the entry in the nodes_map for this node and infer the result
     // count from what was inferred during the first traversal above.
     state.types.push_back(placeholder_ty);
@@ -324,6 +329,8 @@ Status ImportGenericFunction(
   // Import the function attributes with a `tf.` prefix to match the current
   // infrastructure expectations.
   for (const auto& namedAttr : func.attr()) {
+    if (namedAttr.first.empty())
+      return InvalidArgument("Invalid function attribute name");
     const std::string& name = "tf." + namedAttr.first;
     const AttrValue& tf_attr = namedAttr.second;
     TF_ASSIGN_OR_RETURN(Attribute attr,
@@ -448,21 +455,31 @@ Status ImportGenericFunction(
   ret_vals.resize(func.ret_size() + func.control_ret_size(), Value());
   for (const auto& ret_val : func.ret()) {
     auto position = output_name_to_position.find(ret_val.first);
-    if (position == output_name_to_position.end())
+    if (position == output_name_to_position.end()) {
       return InvalidArgument(
           "Can't import function, returned value references unknown output "
           "argument ",
           ret_val.first);
+    }
+    if (ret_val.second.empty()) {
+      return InvalidArgument("Function '", func.signature().name(),
+                             "' has empty result name");
+    }
     ret_vals[position->second] =
         value_manager.GetValueOrCreatePlaceholder(ret_val.second);
   }
   for (const auto& ret_val : func.control_ret()) {
     auto position = control_output_to_position.find(ret_val.first);
-    if (position == control_output_to_position.end())
+    if (position == control_output_to_position.end()) {
       return InvalidArgument(
           "Can't import function, returned value references unknown output "
           "argument ",
           ret_val.first);
+    }
+    if (ret_val.second.empty()) {
+      return InvalidArgument("Function '", func.signature().name(),
+                             "' has empty control result name");
+    }
     Value result = value_manager.GetValueOrCreatePlaceholder(
         (Twine("^") + ret_val.second).str());
     if (!result.getType().isa<ControlType>())
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt
new file mode 100644
index 00000000000000..b7d82f87842dc7
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt
@@ -0,0 +1,26 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Function 'foo' has empty control result name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      control_output: "output"
+    }
+    node_def {
+      name: "y"
+      op: "NoOp"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+    control_ret {
+      key: "output"
+      value: ""
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt
new file mode 100644
index 00000000000000..5b1c3cff4f85ab
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt
@@ -0,0 +1,22 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Node 'y' has an empty input
+
+library {
+  function {
+    signature {
+      name: "foo"
+    }
+    node_def {
+      name: "y"
+      input: ""
+      op: "Identity"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt
new file mode 100644
index 00000000000000..f4fc5263ebf790
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt
@@ -0,0 +1,29 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Function 'foo' has empty result name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      output_arg {
+        name: "output"
+        type: DT_INT32
+      }
+    }
+    node_def {
+      name: "y"
+      op: "NoOp"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+    ret {
+      key: "output"
+      value: ""
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt
new file mode 100644
index 00000000000000..7a0f18f6732027
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt
@@ -0,0 +1,52 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Invalid function attribute name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      input_arg {
+        name: "a"
+      }
+      output_arg {
+        name: "d"
+      }
+    }
+    node_def {
+      op: "Const"
+      attr {
+        key: "_b"
+        value {
+          placeholder: "T"
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+          }
+        }
+      }
+    }
+    ret {
+      key: "d"
+      value: "a"
+    }
+    attr {
+      key: ""
+      value {
+        s: "a"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 96234b1c151a6c..4ce88865fd4ed5 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -26,6 +26,10 @@ load(
     "tf_fingerprint_deps",
     "tf_kernel_tests_linkstatic",
 )
+load(
+    "//third_party/mkl:build_defs.bzl",
+    "mkl_deps",
+)
 
 # buildifier: disable=same-origin-load
 load("//tensorflow:tensorflow.bzl", "cc_header_only_library")
@@ -57,10 +61,6 @@ load(
     "//tensorflow/core/platform:build_config_root.bzl",
     "tf_cuda_tests_tags",
 )
-load(
-    "//third_party/mkl:build_defs.bzl",
-    "mkl_deps",
-)
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
 load(
     "@local_config_rocm//rocm:build_defs.bzl",
@@ -443,6 +443,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:status_matchers",
         "@com_google_absl//absl/base:core_headers",
     ],
 )
@@ -4351,6 +4352,7 @@ tf_kernel_library(
     deps = [
         ":fill_functor",
         ":gpu_prim_hdrs",
+        ":sparse_utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -4557,6 +4559,7 @@ tf_kernel_library(
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core/framework:bounds_check",
+        "//tensorflow/core/util:overflow",
         "//third_party/eigen3",
     ],
 )
@@ -4865,6 +4868,7 @@ cc_library(
 SPARSE_DEPS = [
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
+    ":sparse_utils",
 ]
 
 tf_kernel_library(
@@ -6320,6 +6324,7 @@ filegroup(
         "sparse_reorder_op.h",
         "sparse_slice_op.h",
         "sparse_tensor_dense_matmul_op.h",
+        "sparse_utils.h",
         "string_util.h",
         "string_to_hash_bucket_op.h",
         "string_to_hash_bucket_fast_op.h",
@@ -6554,6 +6559,7 @@ filegroup(
         "random_op_cpu.h",
         "random_ops_util.h",
         "random_poisson_op.cc",
+        "sparse_utils.cc",
         "random_shuffle_op.cc",
         "reduce_join_op.cc",
         "reduction_ops_all.cc",
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index 0429d50cdec23a..bc536d187512af 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -77,10 +78,10 @@ class AvgPoolingOp : public UnaryOp<T> {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
-
     for (int i = 0; i < ksize_.size(); ++i) {
-      OP_REQUIRES(context, ksize_[i] != 0,
-                  errors::InvalidArgument("ksize cannot be zero"));
+      OP_REQUIRES(context, ksize_[i] > 0,
+                  errors::InvalidArgument(
+                      "ksize must be a postive int32 value, got:", ksize_[i]));
     }
   }
 
@@ -142,6 +143,11 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
     OP_REQUIRES(context, ksize_.size() == 4,
                 errors::InvalidArgument("Sliding window ksize field must "
                                         "specify 4 dimensions"));
+    for (int i = 0; i < ksize_.size(); ++i) {
+      OP_REQUIRES(context, ksize_[i] > 0,
+                  errors::InvalidArgument(
+                      "ksize must be a postive int32 value, got:", ksize_[i]));
+    }
     OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
     OP_REQUIRES(context, stride_.size() == 4,
                 errors::InvalidArgument("Sliding window stride field must "
@@ -298,7 +304,7 @@ class AvgPoolingGradOp : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
     const int64_t in_rows = output_shape.dim_size(1);
     const int64_t in_cols = output_shape.dim_size(2);
@@ -457,7 +463,7 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
 
     if (output_shape.num_elements() == 0) {
@@ -543,7 +549,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
     if (output_shape.num_elements() == 0) {
       Tensor* output = nullptr;
diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc
index 0bb0a43b7d0e9f..dce6c221a1e61a 100644
--- a/tensorflow/core/kernels/batch_kernels.cc
+++ b/tensorflow/core/kernels/batch_kernels.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
@@ -654,6 +655,12 @@ class UnbatchResource : public ResourceBase {
           batch_index_t.shape().dim_size(1), ".");
     }
 
+    if (!TensorShapeUtils::IsScalar(context->input(2).shape())) {
+      return errors::InvalidArgument(
+          "Input id should be scalar; "
+          "Got: ",
+          context->input(2).DebugString(), ".");
+    }
     const int64_t batch_key = context->input(2).scalar<int64_t>()();
     const bool nonempty_input = batch_index_t.dim_size(0) > 0;
 
@@ -885,8 +892,13 @@ class UnbatchGradResource : public ResourceBase {
     const Tensor& data_t = context->input(0);
     const Tensor& batch_index_t = context->input(1);
     const Tensor& grad_t = context->input(2);
+    const Tensor& batch_key_t = context->input(3);
 
     mutex_lock ml(mu_);
+    if (batch_key_t.NumElements() != 1) {
+      return errors::InvalidArgument("Expected `id` to be scalar. Received ",
+                                     batch_key_t.DebugString());
+    }
 
     const int64_t batch_key = context->input(3).scalar<int64_t>()();
     // Mark our tensor as available.
@@ -902,6 +914,11 @@ class UnbatchGradResource : public ResourceBase {
             "batch_index is empty while the tensor isn't.");
       }
       std::unordered_set<int64_t> missing_tensors;
+      if (batch_index_t.NumElements() != batch_index_t.dim_size(0) * 3) {
+        return errors::InvalidArgument(
+            "batch_index should contain ", batch_index_t.dim_size(0) * 3,
+            " elements. Received ", batch_index_t.NumElements());
+      }
       const auto batch_index =
           batch_index_t.shaped<int64_t, 2>({batch_index_t.dim_size(0), 3});
       for (int i = 0; i < batch_index_t.dim_size(0); ++i) {
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index aad11a45a09b6c..c8fb81f9591546 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/sparse_utils.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/determinism.h"
@@ -279,6 +280,14 @@ class DenseBincountOp : public OpKernel {
     OP_REQUIRES(ctx, size_t.dims() == 0,
                 errors::InvalidArgument("Shape must be rank 0 but is rank ",
                                         size_t.dims()));
+    OP_REQUIRES(ctx,
+                weights.shape() == data.shape() || weights.NumElements() == 0,
+                errors::InvalidArgument(
+                    "`weights` must be the same shape as `arr` or a length-0 "
+                    "`Tensor`, in which case it acts as all weights equal to "
+                    "1. Received ",
+                    weights.shape().DebugString()));
+
     Tidx size = size_t.scalar<Tidx>()();
     OP_REQUIRES(
         ctx, size >= 0,
@@ -369,7 +378,8 @@ class SparseBincountOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& indices = ctx->input(0);
-    const auto values = ctx->input(1).flat<Tidx>();
+    const Tensor& values = ctx->input(1);
+    const auto values_flat = values.flat<Tidx>();
     const Tensor& dense_shape = ctx->input(2);
     const Tensor& size_t = ctx->input(3);
     const auto weights = ctx->input(4).flat<T>();
@@ -382,6 +392,9 @@ class SparseBincountOp : public OpKernel {
     OP_REQUIRES(
         ctx, size >= 0,
         errors::InvalidArgument("size (", size, ") must be non-negative"));
+    OP_REQUIRES_OK(ctx, sparse_utils::ValidateSparseTensor<int64_t>(
+                            indices, values, dense_shape,
+                            sparse_utils::IndexValidation::kUnordered));
 
     bool is_1d = dense_shape.NumElements() == 1;
 
@@ -394,11 +407,11 @@ class SparseBincountOp : public OpKernel {
       if (binary_output_) {
         OP_REQUIRES_OK(ctx,
                        functor::BincountFunctor<Device, Tidx, T, true>::Compute(
-                           ctx, values, weights, out, size));
+                           ctx, values_flat, weights, out, size));
       } else {
         OP_REQUIRES_OK(
             ctx, functor::BincountFunctor<Device, Tidx, T, false>::Compute(
-                     ctx, values, weights, out, size));
+                     ctx, values_flat, weights, out, size));
       }
     } else {
       const auto shape = dense_shape.flat<int64_t>();
@@ -410,7 +423,7 @@ class SparseBincountOp : public OpKernel {
       const auto indices_mat = indices.matrix<int64_t>();
       for (int64_t i = 0; i < indices_mat.dimension(0); ++i) {
         const int64_t batch = indices_mat(i, 0);
-        const Tidx bin = values(i);
+        const Tidx bin = values_flat(i);
         OP_REQUIRES(
             ctx, batch < out.dimension(0),
             errors::InvalidArgument("Index out of bound. `batch` (", batch,
@@ -480,6 +493,9 @@ class RaggedBincountOp : public OpKernel {
     int num_values = values.size();
     int batch_idx = 0;
 
+    OP_REQUIRES(ctx, splits.size() > 0,
+                errors::InvalidArgument("Splits must be non-empty"));
+
     OP_REQUIRES(ctx, splits(0) == 0,
                 errors::InvalidArgument("Splits must start with 0, not with ",
                                         splits(0)));
diff --git a/tensorflow/core/kernels/candidate_sampler_ops.cc b/tensorflow/core/kernels/candidate_sampler_ops.cc
index 872e805873f4ec..94eb7f2738eb53 100644
--- a/tensorflow/core/kernels/candidate_sampler_ops.cc
+++ b/tensorflow/core/kernels/candidate_sampler_ops.cc
@@ -73,6 +73,14 @@ class BaseCandidateSamplerOp : public OpKernel {
 
     gtl::ArraySlice<int64_t> true_candidate(
         true_classes.matrix<int64_t>().data(), batch_size * num_true_);
+
+    for (const auto& candidate : true_candidate) {
+      OP_REQUIRES(context, candidate >= 0 && candidate < sampler_->range(),
+                  errors::InvalidArgument("`true_candidate` out of range [", 0,
+                                          ", ", sampler_->range(),
+                                          "), received ", candidate));
+    }
+
     gtl::MutableArraySlice<int64_t> sampled_candidate(
         out_sampled_candidates->vec<int64_t>().data(), num_sampled_);
     gtl::MutableArraySlice<float> true_expected_count(
diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc
index 11a289004cfe95..b0c6fa16f082b6 100644
--- a/tensorflow/core/kernels/collective_ops.cc
+++ b/tensorflow/core/kernels/collective_ops.cc
@@ -176,6 +176,10 @@ class CollectiveGatherOpKernel : public CollectiveOpV1Kernel {
   void ComputeAsyncImpl(OpKernelContext* c, CollectiveExecutor* col_exec,
                         DoneCallback done) override {
     auto output_shape = c->input(0).shape();
+    OP_REQUIRES_ASYNC(c, output_shape.dims() > 0,
+                      errors::InvalidArgument("input should have rank > 0, ",
+                                              "recieved ", output_shape.dims()),
+                      done);
     output_shape.set_dim(
         0, output_shape.dim_size(0) * col_params_->group.group_size);
     col_params_->instance.shape = output_shape;
diff --git a/tensorflow/core/kernels/composite_tensor_ops.cc b/tensorflow/core/kernels/composite_tensor_ops.cc
index f41b02991bba43..4e689d27d5acba 100644
--- a/tensorflow/core/kernels/composite_tensor_ops.cc
+++ b/tensorflow/core/kernels/composite_tensor_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/kernels/composite_tensor_variant.h"
@@ -66,7 +67,16 @@ class CompositeTensorVariantToComponents : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     Tensor encoded_t = context->input(0);
+    OP_REQUIRES(
+        context, encoded_t.flat<Variant>().size() > 0,
+        errors::InvalidArgument("Input `encoded` must not be an empty variant "
+                                "tensor, but got ",
+                                encoded_t.DebugString()));
     auto* encoded = encoded_t.flat<Variant>()(0).get<CompositeTensorVariant>();
+    OP_REQUIRES(context, encoded != nullptr,
+                errors::InvalidArgument("The input `encoded` is not a valid "
+                                        "CompositeTensorVariant tensor, got ",
+                                        encoded_t.DebugString()));
 
     // Check that the encoded TypeSpec is compatible with the expected TypeSpec.
     // For now, we just check that the class matches.
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.h b/tensorflow/core/kernels/conv_grad_input_ops.h
index 88df14d395c73d..6fdd72e0014b07 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.h
+++ b/tensorflow/core/kernels/conv_grad_input_ops.h
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
 #include "tensorflow/core/kernels/xsmm_conv2d.h"
 #endif
@@ -421,6 +422,11 @@ class Conv2DBackpropInputOp : public OpKernel {
     const Tensor& filter = context->input(1);
     const Tensor& out_backprop = context->input(2);
 
+    OP_REQUIRES(
+        context, out_backprop.dims() == 4,
+        errors::InvalidArgument("input_sizes must be 4-dimensional, got: ",
+                                out_backprop.dims()));
+
     TensorShape input_shape;
     OP_REQUIRES_OK(context,
                    Conv2DBackpropComputeInputShape(input_sizes, filter.shape(),
@@ -436,6 +442,15 @@ class Conv2DBackpropInputOp : public OpKernel {
       return;
     }
 
+    // If shapes are valid but `out_backprop` is empty, in_backprop should be
+    // set to all zeros.  Otherwise, cudnn/dnnl fail with an empty input.
+    if (out_backprop.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, T> set_zero;
+      set_zero(context->eigen_device<Device>(),
+               in_backprop->template flat<T>());
+      return;
+    }
+
     // For now we take the stride from the second and third dimensions only (we
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
@@ -517,6 +532,10 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
     const Tensor& input_sizes = context->input(0);
     const Tensor& filter = context->input(1);
     const Tensor& out_backprop = context->input(2);
+    OP_REQUIRES(
+        context, out_backprop.dims() == 4,
+        errors::InvalidArgument("input_sizes must be 4-dimensional, got: ",
+                                out_backprop.dims()));
 
     TensorShape input_shape;
     OP_REQUIRES_OK(context,
@@ -554,6 +573,15 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
       return;
     }
 
+    // If shapes are valid but `out_backprop` is empty, in_backprop should be
+    // set to all zeros.  Otherwise, cudnn/dnnl fail with an empty input.
+    if (out_backprop.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, T> set_zero;
+      set_zero(context->eigen_device<Device>(),
+               in_backprop->template flat<T>());
+      return;
+    }
+
 // TODO(ezhulenev): Remove custom kernel and move XSMM support to
 // LaunchConv2DBackpropInputOp functor.
 #if defined TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS && \
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index 9364ee92f5c401..09d3365564d8ad 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -741,6 +741,10 @@ class Conv3DBackpropFilterOp : public OpKernel {
     TensorShape filter_shape;
     if (takes_shape_) {
       const Tensor& filter_sizes = context->input(1);
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_sizes.shape()),
+                  errors::InvalidArgument(
+                      "filter_sizes shape must be rank 1 but is rank ",
+                      filter_sizes.shape().dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
                                   filter_sizes.vec<int32>(), &filter_shape));
     } else {
@@ -875,6 +879,10 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
     TensorShape filter_shape;
     if (takes_shape_) {
       const Tensor& filter_sizes = context->input(1);
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_sizes.shape()),
+                  errors::InvalidArgument(
+                      "filter_sizes shape must be rank 1 but is rank ",
+                      filter_sizes.shape().dims()));
       OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
                                   filter_sizes.vec<int32>(), &filter_shape));
     } else {
@@ -1638,6 +1646,10 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
     TensorShape filter_shape;
     if (takes_shape_) {
       const Tensor& filter_sizes = context->input(1);
+      OP_REQUIRES(context, TensorShapeUtils::IsVector(filter_sizes.shape()),
+                  errors::InvalidArgument(
+                      "filter_sizes shape must be rank 1 but is rank ",
+                      filter_sizes.shape().dims()));
       OP_REQUIRES_OK(context, tensor::MakeShape(filter_sizes, &filter_shape));
     } else {
       filter_shape = context->input(1).shape();
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 67418151a1cf2d..44caed29252057 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/deep_conv2d.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
@@ -700,6 +701,15 @@ class Conv2DOp : public BinaryOp<T> {
       return;
     }
 
+    // If the input is empty, result can only be due to padding.
+    if (input.NumElements() == 0) {
+      // Zero-out output and return.
+      functor::SetZeroFunctor<Device, T>()(context->eigen_device<Device>(),
+                                           output->template flat<T>());
+
+      return;
+    }
+
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
     if (params_.padding != EXPLICIT &&
         LaunchXsmmConvOp<Device, T>::Run(
diff --git a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
index 2d8feb313a30f4..dc5fd97103cc38 100644
--- a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
+++ b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
@@ -667,8 +667,11 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
       st.height_scale = 1.0f;
       st.width_scale = 1.0f;
     }
-    TensorShape resized_shape(
-        {input.dim_size(0), st.out_height, st.out_width, input.dim_size(3)});
+    TensorShape resized_shape;
+    OP_REQUIRES_OK(context, TensorShape::BuildTensorShape(
+                                {input.dim_size(0), st.out_height, st.out_width,
+                                 input.dim_size(3)},
+                                &resized_shape));
     int paddings_index;
     int filter_index;
     if (DoResize) {
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 22d338b778a98e..a0efc1a6e041de 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -623,7 +623,7 @@ class DepthwiseConv2dNativeBackpropInputOp : public OpKernel {
       OP_REQUIRES(context, in_sizes_data[i] >= 0,
                   errors::InvalidArgument("Dimension ", i,
                                           " of input_sizes must be >= 0"));
-      input_shape.AddDim(in_sizes_data[i]);
+      OP_REQUIRES_OK(context, input_shape.AddDimWithStatus(in_sizes_data[i]));
     }
     const TensorShape& filter_shape = filter.shape();
     EXTRACT_AND_VERIFY_DIMENSIONS("DepthwiseConv2DBackpropInput");
@@ -1120,7 +1120,8 @@ class DepthwiseConv2dNativeBackpropFilterOp : public OpKernel {
       OP_REQUIRES(context, filter_sizes_data[i] >= 0,
                   errors::InvalidArgument("Dimension ", i,
                                           " of filter_sizes must be >= 0"));
-      filter_shape.AddDim(filter_sizes_data[i]);
+      OP_REQUIRES_OK(context,
+                     filter_shape.AddDimWithStatus(filter_sizes_data[i]));
     }
     const TensorShape& input_shape = input.shape();
 
diff --git a/tensorflow/core/kernels/edit_distance_op.cc b/tensorflow/core/kernels/edit_distance_op.cc
index 3ff290e92b6103..3ed0f012b83ceb 100644
--- a/tensorflow/core/kernels/edit_distance_op.cc
+++ b/tensorflow/core/kernels/edit_distance_op.cc
@@ -203,9 +203,9 @@ class EditDistanceOp : public OpKernel {
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64_t{0});
         OP_REQUIRES(
-            ctx, loc < output_elements,
+            ctx, 0 <= loc && loc < output_elements,
             errors::Internal("Got an inner product ", loc,
-                             " which would require in writing to outside of "
+                             " which would require writing to outside of "
                              "the buffer for the output tensor (max elements ",
                              output_elements, ")"));
         output_t(loc) =
@@ -218,9 +218,9 @@ class EditDistanceOp : public OpKernel {
         auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                       output_strides.begin(), int64_t{0});
         OP_REQUIRES(
-            ctx, loc < output_elements,
+            ctx, 0 <= loc && loc < output_elements,
             errors::Internal("Got an inner product ", loc,
-                             " which would require in writing to outside of "
+                             " which would require writing to outside of "
                              "the buffer for the output tensor (max elements ",
                              output_elements, ")"));
         output_t(loc) = hypothesis_seq.size();
@@ -232,9 +232,9 @@ class EditDistanceOp : public OpKernel {
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64_t{0});
         OP_REQUIRES(
-            ctx, loc < output_elements,
+            ctx, 0 <= loc && loc < output_elements,
             errors::Internal("Got an inner product ", loc,
-                             " which would require in writing to outside of "
+                             " which would require writing to outside of "
                              "the buffer for the output tensor (max elements ",
                              output_elements, ")"));
         output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
@@ -248,9 +248,9 @@ class EditDistanceOp : public OpKernel {
       auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                     output_strides.begin(), int64_t{0});
       OP_REQUIRES(
-          ctx, loc < output_elements,
+          ctx, 0 <= loc && loc < output_elements,
           errors::Internal("Got an inner product ", loc,
-                           " which would require in writing to outside of the "
+                           " which would require writing to outside of the "
                            "buffer for the output tensor (max elements ",
                            output_elements, ")"));
       output_t(loc) = hypothesis_seq.size();
@@ -266,9 +266,9 @@ class EditDistanceOp : public OpKernel {
       auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                     output_strides.begin(), int64_t{0});
       OP_REQUIRES(
-          ctx, loc < output_elements,
+          ctx, 0 <= loc && loc < output_elements,
           errors::Internal("Got an inner product ", loc,
-                           " which would require in writing to outside of the "
+                           " which would require writing to outside of the "
                            "buffer for the output tensor (max elements ",
                            output_elements, ")"));
       output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
diff --git a/tensorflow/core/kernels/fake_quant_ops.cc b/tensorflow/core/kernels/fake_quant_ops.cc
index aa59213c67d81a..682459866e9885 100644
--- a/tensorflow/core/kernels/fake_quant_ops.cc
+++ b/tensorflow/core/kernels/fake_quant_ops.cc
@@ -24,6 +24,7 @@ limitations under the License.
 // Above is the related header but clang tidy doesn't recognize it.
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/monitoring/gauge.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -205,6 +206,13 @@ class FakeQuantWithMinMaxVarsOp : public OpKernel {
     const Tensor& min = context->input(1);
     const Tensor& max = context->input(2);
 
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min.shape()),
+        InvalidArgument("`min` must be rank 0 but is rank ", min.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max.shape()),
+        InvalidArgument("`max` must be rank 0 but is rank ", max.dims()));
+
     Tensor* output;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
@@ -253,6 +261,12 @@ class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
                 InvalidArgument("gradient and input must be the same size"));
     const Tensor& min = context->input(2);
     const Tensor& max = context->input(3);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min.shape()),
+        InvalidArgument("`min` must be rank 0 but is rank ", min.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max.shape()),
+        InvalidArgument("`max` must be rank 0 but is rank ", max.dims()));
 
     Tensor* grad_wrt_input;
     OP_REQUIRES_OK(context,
@@ -342,10 +356,17 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
     const Tensor& input = context->input(0);
     const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
     const Tensor& min = context->input(1);
+    const Tensor& max = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(min.shape()),
+        InvalidArgument("`min` must be rank 1 but is rank ", min.dims()));
     OP_REQUIRES(context, min.dim_size(0) == depth,
                 InvalidArgument("min has incorrect size, expected ", depth,
                                 " was ", min.dim_size(0)));
-    const Tensor& max = context->input(2);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(max.shape()),
+        InvalidArgument("`max` must be rank 1 but is rank ", max.dims()));
     OP_REQUIRES(context, max.dim_size(0) == depth,
                 InvalidArgument("max has incorrect size, expected ", depth,
                                 " was ", max.dim_size(0)));
@@ -399,10 +420,16 @@ class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
                 InvalidArgument("gradient and input must be the same size"));
     const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
     const Tensor& min = context->input(2);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(min.shape()),
+        InvalidArgument("`min` must be rank 1 but is rank ", min.dims()));
     OP_REQUIRES(context, min.dim_size(0) == depth,
                 InvalidArgument("min has incorrect size, expected ", depth,
                                 " was ", min.dim_size(0)));
     const Tensor& max = context->input(3);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(max.shape()),
+        InvalidArgument("`max` must be rank 1 but is rank ", max.dims()));
     OP_REQUIRES(context, max.dim_size(0) == depth,
                 InvalidArgument("max has incorrect size, expected ", depth,
                                 " was ", max.dim_size(0)));
diff --git a/tensorflow/core/kernels/fft_ops.cc b/tensorflow/core/kernels/fft_ops.cc
index 14d0ebd983142f..3186863884b1ce 100644
--- a/tensorflow/core/kernels/fft_ops.cc
+++ b/tensorflow/core/kernels/fft_ops.cc
@@ -66,6 +66,10 @@ class FFTBase : public OpKernel {
 
       auto fft_length_as_vec = fft_length.vec<int32>();
       for (int i = 0; i < fft_rank; ++i) {
+        OP_REQUIRES(ctx, fft_length_as_vec(i) >= 0,
+                    errors::InvalidArgument(
+                        "fft_length[", i,
+                        "] must >= 0, but got: ", fft_length_as_vec(i)));
         fft_shape[i] = fft_length_as_vec(i);
         // Each input dimension must have length of at least fft_shape[i]. For
         // IRFFTs, the inner-most input dimension must have length of at least
diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
index b3e65aeaee22f8..3bb206866089a0 100644
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #define EIGEN_USE_THREADS
 
 #include <algorithm>
@@ -19,15 +20,15 @@ limitations under the License.
 #include <random>
 #include <vector>
 
-#include "tensorflow/core/kernels/fractional_pool_common.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -43,6 +44,12 @@ class FractionalAvgPoolOp : public OpKernel {
     OP_REQUIRES(context, pooling_ratio_.size() == 4,
                 errors::InvalidArgument(
                     "pooling_ratio field must specify 4 dimensions"));
+    for (std::size_t i = 0; i < pooling_ratio_.size(); ++i) {
+      OP_REQUIRES(context, pooling_ratio_[i] >= 1,
+                  errors::InvalidArgument(
+                      "pooling_ratio cannot be smaller than 1, got: ",
+                      pooling_ratio_[i]));
+    }
     OP_REQUIRES(
         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
         errors::Unimplemented("Fractional average pooling is not yet "
@@ -81,9 +88,11 @@ class FractionalAvgPoolOp : public OpKernel {
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
       OP_REQUIRES(
-          context, pooling_ratio_[i] <= input_size[i],
-          errors::InvalidArgument(
-              "Pooling ratio cannot be bigger than input tensor dim size."));
+          context, input_size[i] >= pooling_ratio_[i],
+          errors::InvalidArgument("Pooling ratio is higher than input "
+                                  "dimension size for dimension ",
+                                  i, ". Input dim size: ", input_size[i],
+                                  " pooling ratio: ", pooling_ratio_[i]));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -241,7 +250,32 @@ class FractionalAvgPoolGradOp : public OpKernel {
                     orig_input_tensor_shape.NumElements() == 4,
                 errors::InvalidArgument("original input tensor shape must be"
                                         "1-dimensional and 4 elements"));
+    int64_t num_elements = 1;
+    for (int i = 0; i < orig_input_tensor_shape.dims(); i++) {
+      OP_REQUIRES(context, orig_input_tensor_shape.dim_size(i) > 0,
+                  errors::InvalidArgument(
+                      "orig_input_tensor_shape must be positive, got: ",
+                      orig_input_tensor_shape.dim_size(i)));
+      num_elements = MultiplyWithoutOverflow(
+          num_elements, orig_input_tensor_shape.dim_size(i));
+      OP_REQUIRES(
+          context, num_elements > 0,
+          errors::InvalidArgument(
+              "The total elements specified by orig_input_tensor_shape",
+              " is too large. Encountered overflow after multiplying ",
+              orig_input_tensor_shape.dim_size(i), ", result: ", num_elements));
+    }
+
     const Tensor& out_backprop = context->input(1);
+    OP_REQUIRES(context, out_backprop.dims() == 4,
+                errors::InvalidArgument("out_backprop must be 4-dimensional"));
+    for (int i = 0; i < out_backprop.dims(); i++) {
+      OP_REQUIRES(context, out_backprop.dim_size(i) > 0,
+                  errors::InvalidArgument(
+                      "out_backprop must be positive for all dimension, got:",
+                      out_backprop.dim_size(i)));
+    }
+
     const Tensor& row_seq_tensor = context->input(2);
     const Tensor& col_seq_tensor = context->input(3);
 
diff --git a/tensorflow/core/kernels/fractional_max_pool_op.cc b/tensorflow/core/kernels/fractional_max_pool_op.cc
index 0722c408fba9d4..ec08b5c5028727 100644
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@@ -19,12 +19,13 @@ limitations under the License.
 #include <random>
 #include <vector>
 
-#include "tensorflow/core/kernels/fractional_pool_common.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
+#include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
@@ -44,6 +45,12 @@ class FractionalMaxPoolOp : public OpKernel {
     OP_REQUIRES(context, pooling_ratio_.size() == 4,
                 errors::InvalidArgument("pooling_ratio field must "
                                         "specify 4 dimensions"));
+    for (std::size_t i = 0; i < pooling_ratio_.size(); ++i) {
+      OP_REQUIRES(context, pooling_ratio_[i] >= 1,
+                  errors::InvalidArgument(
+                      "pooling_ratio cannot be smaller than 1, got: ",
+                      pooling_ratio_[i]));
+    }
 
     OP_REQUIRES(
         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
@@ -257,6 +264,18 @@ class FractionalMaxPoolGradOp : public OpKernel {
     OP_REQUIRES(context, tensor_out.NumElements() > 0,
                 errors::InvalidArgument("orig_output must not be empty, got ",
                                         tensor_out.DebugString()));
+    OP_REQUIRES(
+        context,
+        height_seq_tensor.NumElements() * width_seq_tensor.NumElements() <=
+            tensor_in.NumElements(),
+        errors::InvalidArgument(
+            "Pooling region has more elements than the input tensor. "
+            "row_pooling_sequence: ",
+            height_seq_tensor.DebugString(),
+            "col_pooling_sequence: ", width_seq_tensor.DebugString(),
+            "orig_input: ", tensor_in.DebugString()));
+
+    //
     std::vector<int64_t> input_size(tensor_in_and_out_dims);
     std::vector<int64_t> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -352,7 +371,9 @@ class FractionalMaxPoolGradOp : public OpKernel {
         output_size[2] * output_size[1] * output_size[0];
     for (int64_t i = 0; i < num_reshaped_cols; ++i) {
       for (int64_t j = 0; j < output_size[3]; ++j) {
-        DCHECK_EQ(tensor_out_dup_mat(j, i), tensor_out_mat(j, i));
+        OP_REQUIRES(context, tensor_out_dup_mat(j, i) == tensor_out_mat(j, i),
+                    errors::InvalidArgument(
+                        "tensor_out_dup is not the same as tensor_out"));
       }
     }
 
@@ -369,11 +390,12 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     for (int index = 0; index < num_total_outputs; ++index) {
       int input_backprop_index = out_arg_max_flat(index);
-      // According to maxpooling_op.cc, the performance impact below is small.
-      CHECK(input_backprop_index >= 0 &&
-            input_backprop_index < num_total_inputs)
-          << "Invalid input backprop index: " << input_backprop_index << ", "
-          << num_total_inputs;
+      OP_REQUIRES(
+          context,
+          input_backprop_index >= 0 && input_backprop_index < num_total_inputs,
+          errors::InvalidArgument(
+              "Invalid input backprop index: ", input_backprop_index, ", ",
+              num_total_inputs));
       input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
     }
   }
diff --git a/tensorflow/core/kernels/histogram_op.cc b/tensorflow/core/kernels/histogram_op.cc
index 8e1b53de6ee409..8ba9bccc9fbecd 100644
--- a/tensorflow/core/kernels/histogram_op.cc
+++ b/tensorflow/core/kernels/histogram_op.cc
@@ -50,6 +50,15 @@ struct HistogramFixedWidthFunctor<CPUDevice, T, Tout> {
                         static_cast<double>(nbins);
     const double nbins_minus_1 = static_cast<double>(nbins - 1);
 
+    // We cannot handle NANs in the algorithm below (due to the case to int32)
+    const Eigen::Tensor<int32, 1, 1> nans_tensor =
+        values.isnan().template cast<int32>();
+    const Eigen::Tensor<int32, 0, 1> reduced_tensor = nans_tensor.sum();
+    const int num_nans = reduced_tensor(0);
+    if (num_nans > 0) {
+      return errors::InvalidArgument("Histogram values must not contain NaN");
+    }
+
     // The calculation is done by finding the slot of each value in `values`.
     // With [a, b]:
     //   step = (b - a) / nbins
@@ -98,12 +107,12 @@ class HistogramFixedWidthOp : public OpKernel {
     const auto nbins = nbins_tensor.scalar<int32>()();
 
     OP_REQUIRES(
-        ctx, (value_range(0) < value_range(1)),
+        ctx, value_range(0) < value_range(1),
         errors::InvalidArgument("value_range should satisfy value_range[0] < "
                                 "value_range[1], but got '[",
                                 value_range(0), ", ", value_range(1), "]'"));
     OP_REQUIRES(
-        ctx, (nbins > 0),
+        ctx, nbins > 0,
         errors::InvalidArgument("nbins should be a positive number, but got '",
                                 nbins, "'"));
 
diff --git a/tensorflow/core/kernels/image/draw_bounding_box_op.cc b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
index 0ce1f3fa8a8291..d1a5b59146fde1 100644
--- a/tensorflow/core/kernels/image/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
@@ -119,7 +119,7 @@ class DrawBoundingBoxesOp : public OpKernel {
 
     for (int64_t b = 0; b < batch_size; ++b) {
       const int64_t num_boxes = boxes.dim_size(1);
-      const auto tboxes = boxes.tensor<T, 3>();
+      const auto tboxes = boxes.tensor<float, 3>();
       for (int64_t bb = 0; bb < num_boxes; ++bb) {
         int64_t color_index = bb % color_table.size();
         const int64_t min_box_row =
diff --git a/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
index a12cd3e6601fcd..80dc57377be1a5 100644
--- a/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
+++ b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
@@ -312,6 +312,22 @@ class GenerateBoundingBoxProposals : public tensorflow::OpKernel {
     const auto bbox_deltas = context->input(1);
     const auto image_info = context->input(2);
     const auto anchors = context->input(3);
+
+    OP_REQUIRES(context, scores.dims() == 4,
+                errors::InvalidArgument("`scores` must be rank 4 but is rank ",
+                                        scores.dims()));
+    OP_REQUIRES(
+        context, bbox_deltas.dims() == 4,
+        errors::InvalidArgument("`bbox_deltas` must be rank 4 but is rank ",
+                                bbox_deltas.dims()));
+    OP_REQUIRES(
+        context, image_info.dims() == 2,
+        errors::InvalidArgument("`image_info` must be rank 2 but is rank ",
+                                image_info.dims()));
+    OP_REQUIRES(context, anchors.dims() == 3,
+                errors::InvalidArgument("`anchors` must be rank 3 but is rank ",
+                                        anchors.dims()));
+
     const auto num_images = scores.dim_size(0);
     const auto num_anchors = scores.dim_size(3);
     const auto height = scores.dim_size(1);
diff --git a/tensorflow/core/kernels/image/image_ops.cc b/tensorflow/core/kernels/image/image_ops.cc
index c2e769f146c761..113a9b2af9a822 100644
--- a/tensorflow/core/kernels/image/image_ops.cc
+++ b/tensorflow/core/kernels/image/image_ops.cc
@@ -96,11 +96,12 @@ void DoImageProjectiveTransformOp(OpKernelContext* ctx,
   }
 
   Tensor* output_t;
+  TensorShape output_shape;
   OP_REQUIRES_OK(
-      ctx, ctx->allocate_output(0,
-                                TensorShape({images_t.dim_size(0), out_height,
-                                             out_width, images_t.dim_size(3)}),
-                                &output_t));
+      ctx, TensorShape::BuildTensorShape({images_t.dim_size(0), out_height,
+                                          out_width, images_t.dim_size(3)},
+                                         &output_shape));
+  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t));
   auto output = output_t->tensor<T, 4>();
   auto images = images_t.tensor<T, 4>();
   auto transform = transform_t.matrix<float>();
diff --git a/tensorflow/core/kernels/image/mirror_pad_op.cc b/tensorflow/core/kernels/image/mirror_pad_op.cc
index 9b9ba452517118..b4bf3b3997513f 100644
--- a/tensorflow/core/kernels/image/mirror_pad_op.cc
+++ b/tensorflow/core/kernels/image/mirror_pad_op.cc
@@ -297,13 +297,21 @@ class MirrorPadGradOp : public OpKernel {
     TensorShape output_shape;
     typename TTypes<Tpaddings>::ConstMatrix paddings = in1.matrix<Tpaddings>();
     for (int d = 0; d < dims; ++d) {
-      const Tpaddings before = paddings(d, 0);  // Pad before existing elements.
-      const Tpaddings after = paddings(d, 1);   // Pad after existing elements.
+      const int64_t before = paddings(d, 0);  // Pad before existing elements.
+      const int64_t after = paddings(d, 1);   // Pad after existing elements.
       OP_REQUIRES(context, before >= 0 && after >= 0,
                   errors::InvalidArgument(
                       "Paddings must be non-negative: ", before, ", ", after));
 
-      const int64_t out_size = in0.dim_size(d) - (before + after);
+      const int64_t in_size = in0.dim_size(d);
+      const int64_t total_padding = before + after;
+      OP_REQUIRES(
+          context, total_padding < in_size && total_padding >= 0,
+          errors::InvalidArgument(
+              "Total paddings must be less than the input dimension size: ",
+              total_padding, " was not less than ", in_size));
+
+      const int64_t out_size = in_size - total_padding;
       if (offset_ == 0) {  // SYMMETRIC mode.
         OP_REQUIRES(context, before <= out_size && after <= out_size,
                     errors::InvalidArgument("paddings must be no greater "
diff --git a/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
index a54b60f0099a0b..0d0e0cbdbe639e 100644
--- a/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
@@ -257,11 +257,11 @@ class ResizeNearestNeighborOpGrad : public OpKernel {
     const int64_t out_width = sizes(1);
 
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(
-            0, TensorShape({batch_size, out_height, out_width, channels}),
-            &output));
+    TensorShape shape;
+    OP_REQUIRES_OK(context,
+                   TensorShape::BuildTensorShape(
+                       {batch_size, out_height, out_width, channels}, &shape));
+    OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output));
 
     // Return if the output is empty.
     if (output->NumElements() == 0) return;
diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
index bb55f7de0011a9..676111f4bf14d6 100644
--- a/tensorflow/core/kernels/linalg/linalg_ops_common.cc
+++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 
+#include <initializer_list>
 #include <utility>
 
 #include "third_party/eigen3/Eigen/Core"
@@ -22,7 +23,9 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -152,6 +155,10 @@ void LinearAlgebraOp<InputScalar, OutputScalar>::AnalyzeInputs(
     input_matrix_shapes->emplace_back(
         std::initializer_list<int64_t>({num_rows, num_cols}));
     inputs->emplace_back(&in);
+    OP_REQUIRES(
+        context, in.dtype() == DataTypeToEnum<InputScalar>::v(),
+        errors::InvalidArgument("Invalid input dtype ", in.dtype(), " vs ",
+                                DataTypeToEnum<InputScalar>::v()));
   }
   // Have the derived class validate that the inputs are as expected.
   ValidateInputMatrixShapes(context, *input_matrix_shapes);
@@ -212,6 +219,11 @@ void LinearAlgebraOp<InputScalar, OutputScalar>::PrepareOutputs(
       OP_REQUIRES_OK(context, context->allocate_output(
                                   output_idx, output_tensor_shape, &out));
     }
+    OP_REQUIRES(
+        context, out->dtype() == DataTypeToEnum<OutputScalar>::v(),
+        errors::InvalidArgument("Invalid output dtype ", out->dtype(), " vs ",
+                                DataTypeToEnum<OutputScalar>::v()));
+
     outputs->emplace_back(out);
   }
 }
diff --git a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
index a3532f765a414b..6168baac069d68 100644
--- a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
@@ -395,6 +395,12 @@ class SvdOpGpu : public AsyncOpKernel {
     OP_REQUIRES_OK_ASYNC(context, context->allocate_output(2, shapeV, &outputV),
                          done);
 
+    // If there are zero batches, we are done.
+    if (shapeRaw.num_elements() == 0) {
+      done();
+      return;
+    }
+
     if (n == 0 || m == 0) {
       if (n == m || !compute_uv_ || !full_matrices_) {
         // S, U, and V are all empty. Nothing to do.
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 5f7943a9bad044..64055adf1e2ee3 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -21,19 +21,21 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/list_kernels.h"
 
+#include <algorithm>
+#include <iterator>
 #include <limits>
+#include <memory>
+#include <utility>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
-#include "tensorflow/core/kernels/concat_lib.h"
-#include "tensorflow/core/lib/core/coding.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/util/util.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -49,6 +51,9 @@ Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out) {
     return errors::InvalidArgument(
         "The only valid scalar shape tensor is the fully unknown shape "
         "specified as -1.");
+  } else if (t.shape().dims() != 1) {
+    return errors::InvalidArgument("Shape must be at most rank 1 but is rank ",
+                                   t.shape().dims());
   }
   if (t.dtype() == DT_INT32) {
     return PartialTensorShape::MakePartialShape(t.vec<int32>().data(),
@@ -319,6 +324,11 @@ class TensorListReserve : public OpKernel {
   void Compute(OpKernelContext* c) override {
     PartialTensorShape element_shape;
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(0), &element_shape));
+    OP_REQUIRES(
+        c, TensorShapeUtils::IsScalar(c->input(1).shape()),
+        errors::InvalidArgument(
+            "The num_elements to reserve must be a tensor size 1, but got ",
+            c->input(1).shape()));
     int32_t num_elements = c->input(1).scalar<int32>()();
     OP_REQUIRES(c, num_elements >= 0,
                 errors::InvalidArgument("The num_elements to reserve must be a "
@@ -365,6 +375,8 @@ class TensorListResize : public OpKernel {
   void Compute(OpKernelContext* c) override {
     const TensorList* input_list = nullptr;
     OP_REQUIRES_OK(c, GetInputList(c, 0, &input_list));
+    OP_REQUIRES(c, TensorShapeUtils::IsScalar(c->input(1).shape()),
+                errors::InvalidArgument("size must be a scalar"));
     int32_t size = c->input(1).scalar<int32>()();
     OP_REQUIRES(
         c, size >= 0,
diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index cdcc4c8c34b0d7..bab17b4934c220 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -393,8 +393,11 @@ class TensorListConcat : public OpKernel {
   void Compute(OpKernelContext* c) override {
     PartialTensorShape element_shape_except_first_dim;
     if (!element_shape_.unknown_rank()) {
-      element_shape_except_first_dim = PartialTensorShape(
-          gtl::ArraySlice<int64_t>(element_shape_.dim_sizes()).subspan(1));
+      auto dim_sizes = element_shape_.dim_sizes();
+      OP_REQUIRES(c, !dim_sizes.empty(),
+                  errors::InvalidArgument("element_shape must not be empty"));
+      element_shape_except_first_dim =
+          PartialTensorShape(gtl::ArraySlice<int64_t>(dim_sizes).subspan(1));
     }
     // Check that the input Variant tensor is indeed a TensorList and has the
     // correct element type.
@@ -768,6 +771,11 @@ class TensorListFromTensor : public OpKernel {
     attr.set_on_host(true);
     OP_REQUIRES_OK(c, c->allocate_output(0, {}, &output_tensor, attr));
     PartialTensorShape element_shape;
+    OP_REQUIRES(
+        c, !TensorShapeUtils::IsMatrixOrHigher(c->input(1).shape()),
+        errors::InvalidArgument(
+            "TensorListFromTensor: element_shape must be at most rank 1 but ",
+            "has the shape of ", c->input(1).shape().DebugString()));
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(1), &element_shape));
     TensorList output_list;
     const Tensor& t = c->input(0);
@@ -894,10 +902,20 @@ class TensorListScatter : public OpKernel {
     OP_REQUIRES_OK(c, c->allocate_output(0, {}, &output_tensor, attr));
     Tensor indices = c->input(1);
     PartialTensorShape element_shape;
+    OP_REQUIRES(
+        c, !TensorShapeUtils::IsMatrixOrHigher(c->input(2).shape()),
+        errors::InvalidArgument(
+            "TensorListScatter: element_shape must be at most rank 1 but has ",
+            "the shape of ", c->input(2).shape().DebugString()));
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(2), &element_shape));
     // TensorListScatterV2 passes the num_elements input, TensorListScatter does
     // not.
-    int num_elements = c->num_inputs() >= 4 ? c->input(3).scalar<int>()() : -1;
+    int num_elements = -1;
+    if (c->num_inputs() >= 4) {
+      OP_REQUIRES(c, TensorShapeUtils::IsScalar(c->input(3).shape()),
+                  errors::InvalidArgument("num_elements must be a scalar"));
+      num_elements = c->input(3).scalar<int>()();
+    }
     OP_REQUIRES(c, num_elements >= -1,
                 errors::InvalidArgument(
                     "TensorListScatter expects num_elements >= -1, found: ",
diff --git a/tensorflow/core/kernels/load_and_remap_matrix_op.cc b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
index 3fa753251c0f4f..4276e16059a9c6 100644
--- a/tensorflow/core/kernels/load_and_remap_matrix_op.cc
+++ b/tensorflow/core/kernels/load_and_remap_matrix_op.cc
@@ -74,6 +74,11 @@ class LoadAndRemapMatrixOp : public OpKernel {
     std::vector<bool> row_id_present;
     const Tensor* row_remapping_t;
     OP_REQUIRES_OK(context, context->input("row_remapping", &row_remapping_t));
+    OP_REQUIRES(
+        context, row_remapping_t->dims() == 1,
+        errors::InvalidArgument("The `row_remapping` tensor must be 1-D, got "
+                                "a tensor of shape ",
+                                row_remapping_t->shape().DebugString()));
     const auto row_remapping = row_remapping_t->vec<int64_t>();
     OP_REQUIRES(context, row_remapping.size() == num_rows_,
                 errors::InvalidArgument(strings::StrCat(
diff --git a/tensorflow/core/kernels/lrn_op.cc b/tensorflow/core/kernels/lrn_op.cc
index 31aaf018329b52..0d6d24d3dce2df 100644
--- a/tensorflow/core/kernels/lrn_op.cc
+++ b/tensorflow/core/kernels/lrn_op.cc
@@ -668,7 +668,8 @@ class LRNGradOp : public OpKernel {
         in_image.dim_size(0) == batch && in_image.dim_size(1) == rows &&
             in_image.dim_size(2) == cols && in_image.dim_size(3) == depth &&
             out_image.dim_size(0) == batch && out_image.dim_size(1) == rows &&
-            out_image.dim_size(2) == cols && out_image.dim_size(3) == depth,
+            out_image.dim_size(2) == cols && out_image.dim_size(3) == depth &&
+            out_image.dims() == 4,
         errors::InvalidArgument(
             "input_grads, input_image, and out_image should have the same "
             "shape"));
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 9edd5cf6a6d52b..4fb198690d5ad2 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -1268,6 +1268,13 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
         ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
                         params.out_width, params.depth);
 
+    // Degenerate pooling output should return an empty tensor.
+    if (out_shape.num_elements() == 0) {
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+      return;
+    }
+
     // Assuming qint8 <--> NCHW_VECT_C (int8x4) here.
     constexpr bool is_int8x4 = std::is_same<T, qint8>::value;
     OP_REQUIRES(context, (is_int8x4 == (data_format_ == FORMAT_NCHW_VECT_C)),
diff --git a/tensorflow/core/kernels/mfcc.cc b/tensorflow/core/kernels/mfcc.cc
index 8c755e0df87546..cb4416f7bd3092 100644
--- a/tensorflow/core/kernels/mfcc.cc
+++ b/tensorflow/core/kernels/mfcc.cc
@@ -38,8 +38,10 @@ bool Mfcc::Initialize(int input_length, double input_sample_rate) {
   bool initialized = mel_filterbank_.Initialize(
       input_length, input_sample_rate, filterbank_channel_count_,
       lower_frequency_limit_, upper_frequency_limit_);
-  initialized &=
-      dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_);
+  if (initialized) {
+    initialized =
+        dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_);
+  }
   initialized_ = initialized;
   return initialized;
 }
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank.cc b/tensorflow/core/kernels/mfcc_mel_filterbank.cc
index 8eb2d9d8309f50..c5c2d29d37b99d 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank.cc
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank.cc
@@ -32,6 +32,8 @@ limitations under the License.
 
 #include <math.h>
 
+#include <limits>
+
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -74,7 +76,17 @@ bool MfccMelFilterbank::Initialize(int input_length, double input_sample_rate,
 
   // An extra center frequency is computed at the top to get the upper
   // limit on the high side of the final triangular filter.
-  center_frequencies_.resize(num_channels_ + 1);
+  std::size_t center_frequencies_size = std::size_t(num_channels_) + 1;
+  if (center_frequencies_size >= std::numeric_limits<int>::max() ||
+      center_frequencies_size > center_frequencies_.max_size()) {
+    LOG(ERROR) << "Number of filterbank channels must be less than "
+               << std::numeric_limits<int>::max()
+               << " and less than or equal to "
+               << center_frequencies_.max_size();
+    return false;
+  }
+  center_frequencies_.resize(center_frequencies_size);
+
   const double mel_low = FreqToMel(lower_frequency_limit);
   const double mel_hi = FreqToMel(upper_frequency_limit);
   const double mel_span = mel_hi - mel_low;
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc b/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
index 54f31e1699ef18..26b5afed135051 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/mfcc_mel_filterbank.h"
 
+#include <limits>
 #include <vector>
 
 #include "tensorflow/core/platform/test.h"
@@ -85,4 +86,37 @@ TEST(MfccMelFilterbankTest, IgnoresExistingContentOfOutputVector) {
   }
 }
 
+TEST(MfccMelFilterbankTest, FailsWhenChannelsGreaterThanMaxIntValue) {
+  // Test for bug where vector throws a length_error when it suspects the size
+  // to be more than it's max_size. For now, we fail initialization when the
+  // number of requested channels is >= the maximum value int can take (since
+  // num_channels_ is an int).
+  MfccMelFilterbank filterbank;
+
+  const int kSampleCount = 513;
+  std::size_t num_channels = std::numeric_limits<int>::max();
+  bool initialized = filterbank.Initialize(
+      kSampleCount, 2 /* sample rate */, num_channels /* channels */,
+      1.0 /*  lower frequency limit */, 5.0 /* upper frequency limit */);
+
+  EXPECT_FALSE(initialized);
+}
+
+TEST(MfccMelFilterbankTest, FailsWhenChannelsGreaterThanMaxSize) {
+  // Test for bug where vector throws a length_error when it suspects the size
+  // to be more than it's max_size. For now, we fail initialization when the
+  // number of requested channels is > than std::vector<double>::max_size().
+  MfccMelFilterbank filterbank;
+
+  const int kSampleCount = 513;
+  // Set num_channels to exceed the max_size a double vector can
+  // theoretically take.
+  std::size_t num_channels = std::vector<double>().max_size() + 1;
+  bool initialized = filterbank.Initialize(
+      kSampleCount, 2 /* sample rate */, num_channels /* channels */,
+      1.0 /*  lower frequency limit */, 5.0 /* upper frequency limit */);
+
+  EXPECT_FALSE(initialized);
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mfcc_op.cc b/tensorflow/core/kernels/mfcc_op.cc
index 358a420c1606ab..2c5f9560aaa31c 100644
--- a/tensorflow/core/kernels/mfcc_op.cc
+++ b/tensorflow/core/kernels/mfcc_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Create a speech fingerpring from spectrogram data.
+// Create a speech fingerprint from spectrogram data.
 class MfccOp : public OpKernel {
  public:
   explicit MfccOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -60,10 +60,12 @@ class MfccOp : public OpKernel {
     mfcc.set_lower_frequency_limit(lower_frequency_limit_);
     mfcc.set_filterbank_channel_count(filterbank_channel_count_);
     mfcc.set_dct_coefficient_count(dct_coefficient_count_);
-    OP_REQUIRES(context, mfcc.Initialize(spectrogram_channels, sample_rate),
-                errors::InvalidArgument(
-                    "Mfcc initialization failed for channel count ",
-                    spectrogram_channels, " and sample rate ", sample_rate));
+    OP_REQUIRES(
+        context, mfcc.Initialize(spectrogram_channels, sample_rate),
+        errors::InvalidArgument("Mfcc initialization failed for channel count ",
+                                spectrogram_channels, ", sample rate ",
+                                sample_rate, " and filterbank_channel_count ",
+                                filterbank_channel_count_));
 
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD
index 60534751550485..856086db1190de 100644
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@@ -244,6 +244,7 @@ tf_kernel_library(
         "gpu_op_div_no_nan.cc",
         "gpu_op_equal.cc",
         "gpu_op_floor_div.cc",
+        "gpu_op_floor_mod.cc",
         "gpu_op_greater.cc",
         "gpu_op_greater_equal.cc",
         "gpu_op_left_shift.cc",
@@ -266,8 +267,6 @@ tf_kernel_library(
         "gpu_op_xlog1py.cc",
         "gpu_op_xlogy.cc",
         "gpu_op_zeta.cc",
-    ]) + if_mlir_generated_experimental_kernels_enabled([
-        "gpu_op_floor_mod.cc",
     ]),
     copts = if_mlir_generated_experimental_kernels_enabled([
         "-DMLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED",
@@ -286,6 +285,7 @@ tf_kernel_library(
         ":gpu_div_no_nan_kernels",
         ":gpu_equal_kernels",
         ":gpu_floor_div_kernels",
+        ":gpu_floor_mod_kernels",
         ":gpu_greater_equal_kernels",
         ":gpu_greater_kernels",
         ":gpu_left_shift_kernels",
@@ -309,7 +309,7 @@ tf_kernel_library(
         ":gpu_xlogy_kernels",
         ":gpu_zeta_kernels",
         "//third_party/eigen3",
-    ]) + if_mlir_generated_experimental_kernels_enabled([":gpu_floor_mod_kernels"]),
+    ]),
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
index 33556b462ea2c5..464310c4ed6417 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
@@ -61,7 +61,8 @@ GENERATE_DEFAULT_TESTS(Add, /*test_name=*/Complex128, std::complex<double>,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(Add, /*test_name=*/Int8, int8_t, int8_t, baseline_add,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TESTS(Add, /*test_name=*/Int16, int16_t, int16_t, baseline_add,
@@ -101,7 +102,8 @@ GENERATE_DEFAULT_TESTS(AddV2, /*test_name=*/Complex128, std::complex<double>,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(AddV2, /*test_name=*/Int8, int8_t, int8_t, baseline_add,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TESTS(AddV2, /*test_name=*/Int16, int16_t, int16_t,
@@ -348,7 +350,8 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     Div,
     /*test_name=*/Int8, int8_t, int8_t, test::DefaultInput<int8_t>(),
@@ -393,19 +396,16 @@ TEST_F(BinaryOpsTest, DivComplex128SpecialCases) {
 /// Test `tf.TruncatedDiv`
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     TruncateDiv, /*test_name=*/Int8, int8_t, int8_t,
     test::DefaultInput<int8_t>(), test::DefaultInputNonZero<int8_t>(),
     baseline_div, test::OpsTestConfig().ExpectStrictlyEqual())
-#endif
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     TruncateDiv, /*test_name=*/Uint32, uint32_t, uint32_t,
     test::DefaultInput<uint32_t>(), test::DefaultInputNonZero<uint32_t>(),
     baseline_div, test::OpsTestConfig().ExpectStrictlyEqual())
-#endif
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     TruncateDiv, /*test_name=*/Uint64, uint64_t, uint64_t,
     test::DefaultInput<uint64_t>(), test::DefaultInputNonZero<uint64_t>(),
@@ -495,7 +495,8 @@ TEST_F(BinaryOpsTest, EqualUint8_tSpecialCases) {
 }
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(Equal, /*test_name=*/UInt16, uint16_t, bool,
                        baseline_equal,
                        test::OpsTestConfig().ExpectStrictlyEqual())
@@ -577,7 +578,8 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     test::OpsTestConfig().ExpectStrictlyEqual());
 
 /// Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     FloorDiv,
     /*test_name=*/Int8, int8_t, int8_t, test::DefaultInput<int8_t>(),
@@ -918,7 +920,8 @@ GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/UInt8, uint8_t, uint8_t,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 
 /// Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/Int8, int8_t, int8_t,
                        baseline_maximum,
                        test::OpsTestConfig().ExpectStrictlyEqual())
@@ -960,7 +963,8 @@ GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/UInt8, uint8_t, uint8_t,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 
 /// Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/Int8, int8_t, int8_t,
                        baseline_minimum,
                        test::OpsTestConfig().ExpectStrictlyEqual())
@@ -1139,7 +1143,8 @@ TEST_F(BinaryOpsTest, NotEqualUint8_tSpecialCases) {
 }
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(NotEqual, /*test_name=*/UInt16, uint16_t, bool,
                        baseline_not_equal,
                        test::OpsTestConfig().ExpectStrictlyEqual())
@@ -1288,7 +1293,8 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     test::OpsTestConfig().ExpectStrictlyEqual())
 
 /// Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     Pow, /*test_name=*/Int8, int8_t, int8_t, PowInput<int8_t>(),
     PowInput<int8_t>(), baseline_pow,
@@ -1426,7 +1432,8 @@ GENERATE_DEFAULT_TESTS(Sub, /*test_name=*/UInt64, uint64_t, uint64_t,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 
 /// Test the JIT-compiled kernel.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS(Sub, /*test_name=*/Int8, int8_t, int8_t, baseline_sub,
                        test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TESTS(Sub, /*test_name=*/Int16, int16_t, int16_t, baseline_sub,
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_abs.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_abs.cc
index bfa608b54f56bd..41bed393c11b0c 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_abs.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_abs.cc
@@ -25,7 +25,9 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Abs, DT_DOUBLE);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Abs, DT_INT64);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Abs, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Abs, DT_INT16);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_div.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_div.cc
index 52ba25449d72a1..b6db910b5fd1cb 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_div.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_div.cc
@@ -29,9 +29,11 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX128);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Div, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Div, DT_UINT32);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Div, DT_UINT64);
+#endif
 
 REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_HALF, DT_HALF);
 REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_FLOAT, DT_FLOAT);
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc
index 4b090a11360f28..84a329dcc165f8 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_floor_div.cc
@@ -21,9 +21,11 @@ namespace tensorflow {
 // gpu_op_div.cc, because they alias the Div kernel.
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorDiv, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorDiv, DT_UINT32);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorDiv, DT_UINT64);
+#endif
 
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(FloorDiv, DT_INT16);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(FloorDiv, DT_INT64);
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_floor_mod.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_floor_mod.cc
index ca698e9f05c7ac..8f43f58ba807cb 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_floor_mod.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_floor_mod.cc
@@ -18,6 +18,7 @@ limitations under the License.
 namespace tensorflow {
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_INT16);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_INT64);
@@ -28,5 +29,6 @@ GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_UINT64);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_HALF);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_FLOAT);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(FloorMod, DT_DOUBLE);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
index 2eabd0e0a56b76..a2cbca4f372cb9 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
@@ -27,9 +27,11 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_INT64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_UINT8);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Maximum, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Maximum, DT_UINT16);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Maximum, DT_UINT32);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Maximum, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
index 4d8663dd4ce99b..5567ca65c383ff 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
@@ -27,9 +27,11 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_INT64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_UINT8);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Minimum, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Minimum, DT_UINT16);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Minimum, DT_UINT32);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Minimum, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_ones_like.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_ones_like.cc
index 9121dcdacce934..c79c4e6de0a192 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_ones_like.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_ones_like.cc
@@ -25,11 +25,13 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(OnesLike, DT_DOUBLE);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(OnesLike, DT_INT64);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_INT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_UINT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_UINT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_UINT32);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(OnesLike, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_pow.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_pow.cc
index 6cf77b92c49d9e..98c5ba62a7e88c 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_pow.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_pow.cc
@@ -23,7 +23,9 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Pow, DT_DOUBLE);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Pow, DT_INT64);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Pow, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Pow, DT_INT16);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_relu.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_relu.cc
index 03bc8f626a0948..219ad3225169c8 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_relu.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_relu.cc
@@ -23,6 +23,7 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Relu, DT_FLOAT);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Relu, DT_DOUBLE);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_INT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_INT64);
@@ -30,5 +31,6 @@ GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_UINT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_UINT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_UINT32);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Relu, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_rint.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_rint.cc
index be0e2d264b7bf0..b9d34e85a263dc 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_rint.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_rint.cc
@@ -19,7 +19,9 @@ limitations under the License.
 namespace tensorflow {
 
 // This kernel is JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Rint, DT_HALF);
+#endif
 
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Rint, DT_FLOAT);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Rint, DT_DOUBLE);
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_select.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_select.cc
index 6e3fc61724c74c..bea719a9bbc466 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_select.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_select.cc
@@ -27,11 +27,13 @@ GENERATE_AND_REGISTER_TERNARY_GPU_KERNEL(SelectV2, DT_COMPLEX64);
 GENERATE_AND_REGISTER_TERNARY_GPU_KERNEL(SelectV2, DT_COMPLEX128);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_INT8)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_INT16)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_UINT8)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_UINT16)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_UINT32)
 GENERATE_AND_REGISTER_TERNARY_JIT_GPU_KERNEL(SelectV2, DT_UINT64)
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_sign.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_sign.cc
index 88e498c0f4d7ff..e4ad9df514a989 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_sign.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_sign.cc
@@ -27,7 +27,9 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX64);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX128);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Sign, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Sign, DT_INT16);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_square.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_square.cc
index 53e30dedf75a24..e2ba8d8c3352bd 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_square.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_square.cc
@@ -24,11 +24,13 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Square, DT_DOUBLE);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Square, DT_INT64);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_INT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_UINT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_UINT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_UINT32);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(Square, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_sub.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_sub.cc
index 821af0252b5988..6942194293f890 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_sub.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_sub.cc
@@ -33,9 +33,11 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Sub, DT_COMPLEX64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Sub, DT_COMPLEX128);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Sub, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Sub, DT_INT16);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Sub, DT_UINT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(Sub, DT_UINT16);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_truncate_div.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_truncate_div.cc
index 1ffaff778e6e6d..66db72542b1c36 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_truncate_div.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_truncate_div.cc
@@ -19,8 +19,10 @@ limitations under the License.
 namespace tensorflow {
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(TruncateDiv, DT_INT8);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(TruncateDiv, DT_UINT32);
 GENERATE_AND_REGISTER_BINARY_JIT_GPU_KERNEL(TruncateDiv, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_zeros_like.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_zeros_like.cc
index faf3cea92a6a31..9b58a2751c4acd 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_zeros_like.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_zeros_like.cc
@@ -27,11 +27,13 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(ZerosLike, DT_DOUBLE);
 // GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(ZerosLike, DT_COMPLEX128);
 
 // These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_INT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_INT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_UINT8);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_UINT16);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_UINT32);
 GENERATE_AND_REGISTER_UNARY_JIT_GPU_KERNEL(ZerosLike, DT_UINT64);
+#endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc
index 773ffb82bb9923..56f7c7c1b180b8 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_unary_ops_test.cc
@@ -59,7 +59,8 @@ GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
     baseline_abs, test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(Abs, DT_INT8, DT_INT8, baseline_abs,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(Abs, DT_INT16, DT_INT16, baseline_abs,
@@ -784,7 +785,8 @@ GENERATE_DEFAULT_TEST(OnesLike, DT_INT64, DT_INT64, baseline_ones_like,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(OnesLike, DT_INT8, DT_INT8, baseline_ones_like,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(OnesLike, DT_INT16, DT_INT16, baseline_ones_like,
@@ -853,7 +855,8 @@ GENERATE_DEFAULT_TEST_2(Relu, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT,
                         baseline_relu, test::OpsTestConfig())
 
 // Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(Relu, DT_INT8, DT_INT8, baseline_relu,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(Relu, DT_INT16, DT_INT16, baseline_relu,
@@ -879,7 +882,8 @@ T baseline_rint(T x) {
 }
 
 // Test the JIT-compiled kernel.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST_2(Rint, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT,
                         baseline_rint,
                         test::OpsTestConfig().ExpectStrictlyEqual())
@@ -1033,7 +1037,8 @@ GENERATE_DEFAULT_TEST(Sign, DT_COMPLEX128, DT_COMPLEX128, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(Sign, DT_INT8, DT_INT8, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(Sign, DT_INT16, DT_INT16, baseline_sign,
@@ -1171,7 +1176,8 @@ GENERATE_DEFAULT_TEST(Square, DT_INT64, DT_INT64, baseline_square,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(Square, DT_INT8, DT_INT8, baseline_square,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(Square, DT_INT16, DT_INT16, baseline_square,
@@ -1205,7 +1211,8 @@ GENERATE_DEFAULT_TEST(ZerosLike, DT_INT64, DT_INT64, baseline_zeros_like,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 
 // These kernels are JIT-compiled.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TEST(ZerosLike, DT_INT8, DT_INT8, baseline_zeros_like,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 GENERATE_DEFAULT_TEST(ZerosLike, DT_INT16, DT_INT16, baseline_zeros_like,
diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
index 24b7e3f4ebdd5a..a007d37c4e290a 100644
--- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
+++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/kernels/stateless_random_ops.h"
 #include "tensorflow/core/lib/random/random_distributions.h"
 #include "tensorflow/core/platform/logging.h"
@@ -630,20 +631,18 @@ class ParameterizedTruncatedNormalOp : public OpKernel {
     OP_REQUIRES(ctx, shape_tensor.NumElements() > 0,
                 errors::InvalidArgument("Shape tensor must not be empty, got ",
                                         shape_tensor.DebugString()));
-    int32_t num_batches = shape_tensor.flat<int32>()(0);
+    TensorShape tensor_shape;
+    OP_REQUIRES_OK(ctx, tensor::MakeShape(shape_tensor, &tensor_shape));
 
+    int32_t num_batches = tensor_shape.dim_size(0);
     int32_t samples_per_batch = 1;
-    const int32_t num_dims = shape_tensor.dim_size(0);
+    const int32_t num_dims = tensor_shape.dims();
     for (int32_t i = 1; i < num_dims; i++) {
-      samples_per_batch *= shape_tensor.flat<int32>()(i);
+      samples_per_batch *= tensor_shape.dim_size(i);
     }
     const int32_t num_elements = num_batches * samples_per_batch;
 
     // Allocate the output before fudging num_batches and samples_per_batch.
-    auto shape_vec = shape_tensor.flat<int32>();
-    TensorShape tensor_shape;
-    OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
-                            shape_vec.data(), shape_vec.size(), &tensor_shape));
     Tensor* samples_tensor;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, tensor_shape, &samples_tensor));
 
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index d4444b677a9504..661827e537a0b6 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -531,7 +531,7 @@ class AvgPooling3dGradOp : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
 
     Tensor* output;
diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
index d63a49a04be621..ae02b57861ac02 100644
--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
@@ -21,19 +21,23 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#include "tensorflow/core/kernels/quantize_and_dequantize_op.h"
-
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/quantize_and_dequantize_op.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
+namespace {
 
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
+using CpuDevice = ::Eigen::ThreadPoolDevice;
+using GpuDevice = ::Eigen::GpuDevice;
+using ::tensorflow::errors::InvalidArgument;
+
+}  // namespace
 
 // Simulate quantization precision loss in a float tensor by:
 // 1. Quantize the tensor to fixed point numbers, which should match the target
@@ -49,8 +53,8 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &axis_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits_));
     OP_REQUIRES(ctx, num_bits_ > 0 && num_bits_ < (signed_input_ ? 62 : 63),
-                errors::InvalidArgument("num_bits is out of range: ", num_bits_,
-                                        " with signed_input_ ", signed_input_));
+                InvalidArgument("num_bits is out of range: ", num_bits_,
+                                " with signed_input_ ", signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("range_given", &range_given_));
 
     string round_mode_string;
@@ -58,10 +62,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
     OP_REQUIRES(
         ctx,
         (round_mode_string == "HALF_UP" || round_mode_string == "HALF_TO_EVEN"),
-        errors::InvalidArgument("Round mode string must be "
-                                "'HALF_UP' or "
-                                "'HALF_TO_EVEN', is '" +
-                                round_mode_string + "'"));
+        InvalidArgument("Round mode string must be "
+                        "'HALF_UP' or "
+                        "'HALF_TO_EVEN', is '" +
+                        round_mode_string + "'"));
     if (round_mode_string == "HALF_UP") {
       round_mode_ = ROUND_HALF_UP;
     } else if (round_mode_string == "HALF_TO_EVEN") {
@@ -72,12 +76,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    OP_REQUIRES(
-        ctx, axis_ >= -1,
-        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
-    OP_REQUIRES(
-        ctx, (axis_ == -1 || axis_ < input.shape().dims()),
-        errors::InvalidArgument("Shape must be at least rank ", axis_ + 1,
+    OP_REQUIRES(ctx, axis_ >= -1,
+                InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+                InvalidArgument("Shape must be at least rank ", axis_ + 1,
                                 " but is rank ", input.shape().dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor input_min_tensor;
@@ -91,21 +93,21 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
         auto min_val = input_min_tensor.scalar<T>()();
         auto max_val = input_max_tensor.scalar<T>()();
         OP_REQUIRES(ctx, min_val <= max_val,
-                    errors::InvalidArgument("Invalid range: input_min ",
-                                            min_val, " > input_max ", max_val));
+                    InvalidArgument("Invalid range: input_min ", min_val,
+                                    " > input_max ", max_val));
       } else {
-        OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_min_tensor has incorrect size, was ",
-                        input_min_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_min_tensor.shape()));
-        OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_max_tensor has incorrect size, was ",
-                        input_max_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_max_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_min_tensor.dim_size(0) == depth,
+            InvalidArgument("input_min_tensor has incorrect size, was ",
+                            input_min_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_min_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_max_tensor.dim_size(0) == depth,
+            InvalidArgument("input_max_tensor has incorrect size, was ",
+                            input_max_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_max_tensor.shape()));
       }
     } else {
       auto range_shape = (axis_ == -1) ? TensorShape({}) : TensorShape({depth});
@@ -158,38 +160,34 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
     Tensor* input_backprop = nullptr;
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, input.shape(), &input_backprop));
-    OP_REQUIRES(
-        ctx, axis_ >= -1,
-        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(ctx, axis_ >= -1,
+                InvalidArgument("Axis must be at least -1. Found ", axis_));
     OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Axis should be -1 or 0 or a positive value less than ",
                     input.shape().dims(), "but given axis value was ", axis_));
 
-    OP_REQUIRES(
-        ctx, input.IsSameSize(gradient),
-        errors::InvalidArgument("gradient and input must be the same size"));
+    OP_REQUIRES(ctx, input.IsSameSize(gradient),
+                InvalidArgument("gradient and input must be the same size"));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     const Tensor& input_min_tensor = ctx->input(2);
     OP_REQUIRES(ctx,
                 input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "Input min tensor must have dimension 1. Recieved ",
+                InvalidArgument(
+                    "Input min tensor must have dimension 0 or 1. Received ",
                     input_min_tensor.dims(), "."));
     const Tensor& input_max_tensor = ctx->input(3);
     OP_REQUIRES(ctx,
                 input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1,
-                errors::InvalidArgument(
-                    "Input max tensor must have dimension 1. Recieved ",
+                InvalidArgument(
+                    "Input max tensor must have dimension 0 or 1. Received ",
                     input_max_tensor.dims(), "."));
     if (axis_ != -1) {
-      OP_REQUIRES(
-          ctx, input_min_tensor.dim_size(0) == depth,
-          errors::InvalidArgument("min has incorrect size, expected ", depth,
+      OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
+                  InvalidArgument("min has incorrect size, expected ", depth,
                                   " was ", input_min_tensor.dim_size(0)));
-      OP_REQUIRES(
-          ctx, input_max_tensor.dim_size(0) == depth,
-          errors::InvalidArgument("max has incorrect size, expected ", depth,
+      OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
+                  InvalidArgument("max has incorrect size, expected ", depth,
                                   " was ", input_max_tensor.dim_size(0)));
     }
 
@@ -203,6 +201,12 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
                    ctx->allocate_output(2, min_max_shape, &input_max_backprop));
 
     if (axis_ == -1) {
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::IsScalar(input_min_tensor.shape()),
+          InvalidArgument("input_min must be a scalar if axis is unspecified"));
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::IsScalar(input_max_tensor.shape()),
+          InvalidArgument("input_max must be a scalar if axis is unspecified"));
       functor::QuantizeAndDequantizeOneScaleGradientFunctor<Device, T> f;
       f(ctx->eigen_device<Device>(), gradient.template flat<T>(),
         input.template flat<T>(), input_min_tensor.scalar<T>(),
@@ -246,21 +250,25 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
     OP_REQUIRES(ctx, axis_ < input.dims(),
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Axis requested is larger than input dimensions. Axis: ",
                     axis_, " Input Dimensions: ", input.dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
 
-    Tensor num_bits_tensor;
-    num_bits_tensor = ctx->input(3);
-    int num_bits_val = num_bits_tensor.scalar<int32>()();
+    // Get num_bits and validate.
+    const Tensor num_bits_tensor = ctx->input(3);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(num_bits_tensor.shape()),
+                InvalidArgument("Invalid shape. The `num_bits` tensor should "
+                                "be a scalar. Got dimensions: ",
+                                num_bits_tensor.dims()));
 
-    OP_REQUIRES(
-        ctx, num_bits_val > 0 && num_bits_val < (signed_input_ ? 62 : 63),
-        errors::InvalidArgument("num_bits is out of range: ", num_bits_val,
-                                " with signed_input_ ", signed_input_));
+    const int num_bits_val = num_bits_tensor.scalar<int32>()();
+    OP_REQUIRES(ctx,
+                num_bits_val > 0 && num_bits_val < (signed_input_ ? 62 : 63),
+                InvalidArgument("num_bits is out of range: ", num_bits_val,
+                                " with `signed_input_` ", signed_input_));
 
     Tensor input_min_tensor;
     Tensor input_max_tensor;
@@ -268,24 +276,24 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
       input_min_tensor = ctx->input(1);
       input_max_tensor = ctx->input(2);
       if (axis_ == -1) {
-        auto min_val = input_min_tensor.scalar<T>()();
-        auto max_val = input_max_tensor.scalar<T>()();
+        const auto min_val = input_min_tensor.scalar<T>()();
+        const auto max_val = input_max_tensor.scalar<T>()();
         OP_REQUIRES(ctx, min_val <= max_val,
-                    errors::InvalidArgument("Invalid range: input_min ",
-                                            min_val, " > input_max ", max_val));
+                    InvalidArgument("Invalid range: input_min ", min_val,
+                                    " > input_max ", max_val));
       } else {
-        OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_min_tensor has incorrect size, was ",
-                        input_min_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_min_tensor.shape()));
-        OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_max_tensor has incorrect size, was ",
-                        input_max_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_max_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_min_tensor.dim_size(0) == depth,
+            InvalidArgument("input_min_tensor has incorrect size, was ",
+                            input_min_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_min_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_max_tensor.dim_size(0) == depth,
+            InvalidArgument("input_max_tensor has incorrect size, was ",
+                            input_max_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_max_tensor.shape()));
       }
     } else {
       auto range_shape = (axis_ == -1) ? TensorShape({}) : TensorShape({depth});
@@ -325,15 +333,14 @@ class QuantizeAndDequantizeOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("signed_input", &signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits_));
     OP_REQUIRES(ctx, num_bits_ > 0 && num_bits_ < (signed_input_ ? 62 : 63),
-                errors::InvalidArgument("num_bits is out of range: ", num_bits_,
-                                        " with signed_input_ ", signed_input_));
+                InvalidArgument("num_bits is out of range: ", num_bits_,
+                                " with signed_input_ ", signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("range_given", &range_given_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("input_min", &input_min_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("input_max", &input_max_));
     if (range_given_) {
-      OP_REQUIRES(
-          ctx, input_min_ <= input_max_,
-          errors::InvalidArgument("Invalid range: input_min ", input_min_,
+      OP_REQUIRES(ctx, input_min_ <= input_max_,
+                  InvalidArgument("Invalid range: input_min ", input_min_,
                                   " > input_max ", input_max_));
     }
   }
@@ -365,53 +372,53 @@ class QuantizeAndDequantizeOp : public OpKernel {
   float input_max_;
 };
 
-// Specializations for CPUDevice.
+// Specializations for CpuDevice.
 
 namespace functor {
 template <typename T>
-struct QuantizeAndDequantizeOneScaleFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstVec input,
+struct QuantizeAndDequantizeOneScaleFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T>::ConstVec input,
                   const bool signed_input, const int num_bits,
                   const bool range_given, Tensor* input_min_tensor,
                   Tensor* input_max_tensor, QuantizerRoundMode round_mode,
                   bool narrow_range, typename TTypes<T>::Vec out) {
-    QuantizeAndDequantizeOneScaleImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizeOneScaleImpl<CpuDevice, T>::Compute(
         d, input, signed_input, num_bits, range_given, input_min_tensor,
         input_max_tensor, round_mode, narrow_range, out);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizePerChannelFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T, 3>::ConstTensor input,
+struct QuantizeAndDequantizePerChannelFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T, 3>::ConstTensor input,
                   bool signed_input, int num_bits, bool range_given,
                   Tensor* input_min_tensor, Tensor* input_max_tensor,
                   QuantizerRoundMode round_mode, bool narrow_range,
                   typename TTypes<T, 3>::Tensor out) {
-    QuantizeAndDequantizePerChannelImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizePerChannelImpl<CpuDevice, T>::Compute(
         d, input, signed_input, num_bits, range_given, input_min_tensor,
         input_max_tensor, round_mode, narrow_range, out);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat gradient,
+struct QuantizeAndDequantizeOneScaleGradientFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T>::ConstFlat gradient,
                   typename TTypes<T>::ConstFlat input,
                   typename TTypes<T>::ConstScalar input_min_tensor,
                   typename TTypes<T>::ConstScalar input_max_tensor,
                   typename TTypes<T>::Flat input_backprop,
                   typename TTypes<T>::Scalar input_min_backprop,
                   typename TTypes<T>::Scalar input_max_backprop) {
-    QuantizeAndDequantizeOneScaleGradientImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizeOneScaleGradientImpl<CpuDevice, T>::Compute(
         d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
         input_min_backprop, input_max_backprop);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizePerChannelGradientFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d,
+struct QuantizeAndDequantizePerChannelGradientFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d,
                   typename TTypes<T, 3>::ConstTensor gradient,
                   typename TTypes<T, 3>::ConstTensor input,
                   const Tensor* input_min_tensor,
@@ -419,16 +426,16 @@ struct QuantizeAndDequantizePerChannelGradientFunctor<CPUDevice, T> {
                   typename TTypes<T, 3>::Tensor input_backprop,
                   typename TTypes<T>::Flat input_min_backprop,
                   typename TTypes<T>::Flat input_max_backprop) {
-    QuantizeAndDequantizePerChannelGradientImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizePerChannelGradientImpl<CpuDevice, T>::Compute(
         d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
         input_min_backprop, input_max_backprop);
   }
 };
 
-template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice,
+template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<CpuDevice,
                                                                       float>;
 template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
-    CPUDevice, double>;
+    CpuDevice, double>;
 
 }  // namespace functor
 
@@ -436,22 +443,22 @@ template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV2")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV3")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV3Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV3Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV4GradientOp<CPUDevice, T>);    \
+                          QuantizeAndDequantizeV4GradientOp<CpuDevice, T>);    \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("QuantizeAndDequantize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      QuantizeAndDequantizeOp<CPUDevice, T>);
+      QuantizeAndDequantizeOp<CpuDevice, T>);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -464,29 +471,29 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV3")                      \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .HostMemory("num_bits")                          \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV3Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV3Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV4GradientOp<GPUDevice, T>);    \
+                          QuantizeAndDequantizeV4GradientOp<GpuDevice, T>);    \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("QuantizeAndDequantize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      QuantizeAndDequantizeOp<GPUDevice, T>);
+      QuantizeAndDequantizeOp<GpuDevice, T>);
 TF_CALL_float(REGISTER_GPU_KERNEL);
 TF_CALL_double(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index 1b948c8108de87..83f8996b4cc746 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -40,8 +40,20 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    const float input_min_float = ctx->input(1).flat<float>()(0);
-    const float input_max_float = ctx->input(2).flat<float>()(0);
+    const Tensor& input_min = ctx->input(1);
+    const Tensor& input_max = ctx->input(2);
+
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_min.shape()),
+        errors::InvalidArgument("`input_min` must be rank 0 but is rank ",
+                                input_min.dims()));
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_max.shape()),
+        errors::InvalidArgument("`input_max` must be rank 0 but is rank ",
+                                input_max.dims()));
+
+    const float input_min_float = input_min.scalar<float>()();
+    const float input_max_float = input_max.scalar<float>()();
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
     Tensor* output_min = nullptr;
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
index 48b56ae0ac20c4..0c34a9da7218a8 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
@@ -53,8 +53,8 @@ TEST_F(QuantizeDownAndShrinkRangeTest, HandCrafted) {
   const int value_count = 3;
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   TF_ASSERT_OK(RunOpKernel());
   Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count}));
   test::FillValues<quint8>(&expected, {0, 127, 255});
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
index 2896c3d45a7023..36d321a8e17138 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops.cc
@@ -32,8 +32,21 @@ class QuantizedReluOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input_tensor.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input_tensor.dims()));
+
+    const float min_input = min_input_tensor.scalar<float>()();
+    const float max_input = max_input_tensor.scalar<float>()();
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
@@ -65,8 +78,21 @@ class QuantizedRelu6Op : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input_tensor.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input_tensor.dims()));
+
+    const float min_input = min_input_tensor.scalar<float>()();
+    const float max_input = max_input_tensor.scalar<float>()();
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/quantized_activation_ops_test.cc b/tensorflow/core/kernels/quantized_activation_ops_test.cc
index b3b7cb58b9a455..34c5130f4759b5 100644
--- a/tensorflow/core/kernels/quantized_activation_ops_test.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops_test.cc
@@ -55,8 +55,8 @@ TEST_F(QuantizedActivationsTest, TestRelu) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -86,8 +86,8 @@ TEST_F(QuantizedActivationsTest, TestRelu6) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 1f9897b9b61cd7..5cf7ed1456034e 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -457,10 +458,28 @@ class QuantizedAddOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& x = context->input(0);
     const Tensor& y = context->input(1);
-    const float min_x = context->input(2).flat<float>()(0);
-    const float max_x = context->input(3).flat<float>()(0);
-    const float min_y = context->input(4).flat<float>()(0);
-    const float max_y = context->input(5).flat<float>()(0);
+    const Tensor& min_x_tensor = context->input(2);
+    const Tensor& max_x_tensor = context->input(3);
+    const Tensor& min_y_tensor = context->input(4);
+    const Tensor& max_y_tensor = context->input(5);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
+                errors::InvalidArgument("`min_x` must be rank 0 but is rank ",
+                                        min_x_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
+                errors::InvalidArgument("`max_x` must be rank 0 but is rank ",
+                                        max_x_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
+                errors::InvalidArgument("`min_y` must be rank 0 but is rank ",
+                                        min_y_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
+                errors::InvalidArgument("`max_y` must be rank 0 but is rank ",
+                                        max_y_tensor.dims()));
+
+    const float min_x = min_x_tensor.scalar<float>()();
+    const float max_x = max_x_tensor.scalar<float>()();
+    const float min_y = min_y_tensor.scalar<float>()();
+    const float max_y = max_y_tensor.scalar<float>()();
 
     BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
     if (!bcast.IsValid()) {
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index db0e21a498011d..c064f9b1b21e25 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
@@ -38,10 +39,30 @@ class QuantizedBiasAddOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
     const Tensor& bias = context->input(1);
-    const float input_min = context->input(2).flat<float>()(0);
-    const float input_max = context->input(3).flat<float>()(0);
-    const float bias_min = context->input(4).flat<float>()(0);
-    const float bias_max = context->input(5).flat<float>()(0);
+
+    const Tensor& min_input = context->input(2);
+    const Tensor& max_input = context->input(3);
+    const Tensor& min_bias = context->input(4);
+    const Tensor& max_bias = context->input(5);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_bias.shape()),
+                errors::InvalidArgument(
+                    "`min_bias` must be rank 0 but is rank ", min_bias.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_bias.shape()),
+                errors::InvalidArgument(
+                    "`max_bias` must be rank 0 but is rank ", max_bias.dims()));
+
+    const float input_min = min_input.flat<float>()(0);
+    const float input_max = max_input.flat<float>()(0);
+    const float bias_min = min_bias.flat<float>()(0);
+    const float bias_max = max_bias.flat<float>()(0);
 
     OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()),
                 errors::InvalidArgument("Input tensor must be at least 2D: ",
diff --git a/tensorflow/core/kernels/quantized_bias_add_op_test.cc b/tensorflow/core/kernels/quantized_bias_add_op_test.cc
index 7b99ceafe261b7..edfae98efa953b 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op_test.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op_test.cc
@@ -74,10 +74,10 @@ TEST_F(QuantizedBiasAddTest, Small) {
                             input_quantized.flat<quint8>());
   AddInputFromArray<quint8>(bias_quantized.shape(),
                             bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {bias_min});
+  AddInputFromArray<float>(TensorShape({}), {bias_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -156,10 +156,10 @@ TEST_F(QuantizedBiasAddTest, RealData) {
                             input_quantized.flat<quint8>());
   AddInputFromArray<quint8>(bias_quantized.shape(),
                             bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {bias_min});
+  AddInputFromArray<float>(TensorShape({}), {bias_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc
index 39824ab2546d33..0519256b613be6 100644
--- a/tensorflow/core/kernels/quantized_conv_ops.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops.cc
@@ -18,8 +18,6 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
-#include "tensorflow/core/platform/errors.h"
-
 #define EIGEN_USE_THREADS
 
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
@@ -32,6 +30,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/util/padding.h"
 
 namespace tensorflow {
@@ -499,11 +498,26 @@ class QuantizedConv2DOp : public OpKernel {
 
     // For 2D convolution, there should be 4 dimensions.
     OP_REQUIRES(context, input.dims() == 4,
-                errors::InvalidArgument("input must be 4-dimensional",
-                                        input.shape().DebugString()));
+                errors::InvalidArgument("input must be rank 4 but is rank ",
+                                        input.shape().dims()));
     OP_REQUIRES(context, filter.dims() == 4,
-                errors::InvalidArgument("filter must be 4-dimensional: ",
-                                        filter.shape().DebugString()));
+                errors::InvalidArgument("filter must be rank 4 but is rank ",
+                                        filter.shape().dims()));
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(2).shape()),
+                errors::InvalidArgument("min_input must be rank 0 but is rank ",
+                                        context->input(2).shape().dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(3).shape()),
+                errors::InvalidArgument("max_input must be rank 0 but is rank ",
+                                        context->input(3).shape().dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(context->input(4).shape()),
+        errors::InvalidArgument("min_filter must be rank 0 but is rank ",
+                                context->input(4).shape().dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(context->input(5).shape()),
+        errors::InvalidArgument("max_filter must be rank 0 but is rank ",
+                                context->input(5).shape().dims()));
 
     const float min_input = context->input(2).flat<float>()(0);
     const float max_input = context->input(3).flat<float>()(0);
diff --git a/tensorflow/core/kernels/quantized_conv_ops_test.cc b/tensorflow/core/kernels/quantized_conv_ops_test.cc
index 4226378bb64683..dd0878a36df7b0 100644
--- a/tensorflow/core/kernels/quantized_conv_ops_test.cc
+++ b/tensorflow/core/kernels/quantized_conv_ops_test.cc
@@ -91,10 +91,10 @@ TEST_F(QuantizedConv2DTest, Small) {
                             image_quantized.flat<quint8>());
   AddInputFromArray<quint8>(filter_quantized.shape(),
                             filter_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {image_min});
-  AddInputFromArray<float>(TensorShape({1}), {image_max});
-  AddInputFromArray<float>(TensorShape({1}), {filter_min});
-  AddInputFromArray<float>(TensorShape({1}), {filter_max});
+  AddInputFromArray<float>(TensorShape({}), {image_min});
+  AddInputFromArray<float>(TensorShape({}), {image_max});
+  AddInputFromArray<float>(TensorShape({}), {filter_min});
+  AddInputFromArray<float>(TensorShape({}), {filter_max});
   TF_ASSERT_OK(RunOpKernel());
 
   // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
@@ -158,10 +158,10 @@ TEST_F(QuantizedConv2DTest, Small32Bit) {
   AddInputFromArray<quint8>(
       TensorShape({filter_size, filter_size, depth, filter_count}),
       {10, 40, 70, 20, 50, 80, 30, 60, 90});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   const int expected_width = image_width;
@@ -201,10 +201,10 @@ TEST_F(QuantizedConv2DTest, OddPadding) {
   AddInputFromArray<quint8>(
       TensorShape({filter_size, filter_size, depth, filter_count}),
       {1, 2, 3, 4, 5, 6, 7, 8, 9});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   const int expected_width = image_width / stride;
@@ -244,10 +244,10 @@ TEST_F(QuantizedConv2DTest, OddPaddingBatch) {
   AddInputFromArray<quint8>(
       TensorShape({filter_size, filter_size, depth, filter_count}),
       {1, 2, 3, 4, 5, 6, 7, 8, 9});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   const int expected_width = image_width / stride;
@@ -302,10 +302,10 @@ TEST_F(QuantizedConv2DTest, SmallWithNoZero) {
                             image_quantized.flat<quint8>());
   AddInputFromArray<quint8>(filter_quantized.shape(),
                             filter_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {image_min});
-  AddInputFromArray<float>(TensorShape({1}), {image_max});
-  AddInputFromArray<float>(TensorShape({1}), {filter_min});
-  AddInputFromArray<float>(TensorShape({1}), {filter_max});
+  AddInputFromArray<float>(TensorShape({}), {image_min});
+  AddInputFromArray<float>(TensorShape({}), {image_max});
+  AddInputFromArray<float>(TensorShape({}), {filter_min});
+  AddInputFromArray<float>(TensorShape({}), {filter_max});
   TF_ASSERT_OK(RunOpKernel());
   const int expected_width = image_width;
   const int expected_height = image_height * filter_count;
diff --git a/tensorflow/core/kernels/quantized_instance_norm.cc b/tensorflow/core/kernels/quantized_instance_norm.cc
index d62094cc9fad85..6cc2ad55e430cc 100644
--- a/tensorflow/core/kernels/quantized_instance_norm.cc
+++ b/tensorflow/core/kernels/quantized_instance_norm.cc
@@ -25,7 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
-
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 
 #ifdef USE_NEON
@@ -274,8 +274,16 @@ class QuantizedInstanceNorm : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
 
-    float input_min = context->input(1).flat<float>()(0);
-    float input_max = context->input(2).flat<float>()(0);
+    const Tensor& x_min = context->input(1);
+    const Tensor& x_max = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(x_min.shape()),
+                errors::InvalidArgument("`x_min` must be rank 0 but is rank ",
+                                        x_min.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(x_max.shape()),
+                errors::InvalidArgument("`x_max` must be rank 0 but is rank ",
+                                        x_max.dims()));
+    float input_min = x_min.scalar<float>()();
+    float input_max = x_max.scalar<float>()();
     float input_scale = (input_max - input_min) / 255.0f;
 
     OP_REQUIRES(context, input_min < input_max,
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 9d3b5279e4bb82..ae65dc3b5e38ce 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -20,11 +20,14 @@ limitations under the License.
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -75,9 +78,21 @@ class QuantizedMatMulOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& a = context->input(0);
     const Tensor& b = context->input(1);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(2).shape()),
+                errors::InvalidArgument("min_a must be a scalar, but got shape",
+                                        context->input(2).shape()));
     const float min_a = context->input(2).flat<float>()(0);
+    OP_REQUIRES(context, context->input(3).NumElements() == 1,
+                errors::InvalidArgument("max_a must be a scalar, but got shape",
+                                        context->input(3).shape()));
     const float max_a = context->input(3).flat<float>()(0);
+    OP_REQUIRES(context, context->input(4).NumElements() == 1,
+                errors::InvalidArgument("min_b must be a scalar, but got shape",
+                                        context->input(4).shape()));
     const float min_b = context->input(4).flat<float>()(0);
+    OP_REQUIRES(context, context->input(5).NumElements() == 1,
+                errors::InvalidArgument("max_b must be a scalar, but got shape",
+                                        context->input(5).shape()));
     const float max_b = context->input(5).flat<float>()(0);
 
     // Make sure that we have valid quantization ranges for the input buffers.
diff --git a/tensorflow/core/kernels/quantized_matmul_op_test.cc b/tensorflow/core/kernels/quantized_matmul_op_test.cc
index c9f05dbc10bb8b..f562a2ebcb744f 100644
--- a/tensorflow/core/kernels/quantized_matmul_op_test.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op_test.cc
@@ -62,10 +62,10 @@ TEST_F(QuantizedMatMulTest, Small_NoParams) {
   // | 15 | 16 | 17 | 18 |
   AddInputFromArray<quint8>(TensorShape({3, 4}),
                             {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -118,10 +118,10 @@ TEST_F(QuantizedMatMulTest, VerySmall_WithParams) {
   // The B matrix is:
   // |   1 |
   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   TF_ASSERT_OK(RunOpKernel());
   // We're requesting C = A.transposed() * B,
   // so we expect to get these results:
@@ -162,12 +162,50 @@ TEST_F(QuantizedMatMulTest, VerySmall_BadRange) {
   // The B matrix is:
   // |   1 |
   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
   // Here we set the range so that the min and max are equal, so we expect to
   // see an error when we run.
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
+}
+
+// This test multiplies two 1x1 8bit matrices, but sets invalid quantized min
+// and max values, so we expect to get an error
+TEST_F(QuantizedMatMulTest, VerySmall_BadMinMax) {
+  // These parameters reflect a typical production usage of eight-bit matmuls
+  // in an Inception-style network.
+  const bool transpose_a = true;
+  const int a_rows = 1;
+  const int a_cols = 1;
+  const int b_rows = 1;
+  const int b_cols = 1;
+  const bool transpose_b = false;
+  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
+                   .Input(FakeInput(DT_QUINT8))
+                   .Input(FakeInput(DT_QUINT8))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
+                   .Attr("transpose_a", transpose_a)
+                   .Attr("transpose_b", transpose_b)
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  // The A matrix is:
+  // |  -1 |
+  AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
+  // The B matrix is:
+  // |   1 |
+  AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
+  // Here we set the error of a non scalar min_a value, so we expect to see an
+  // error when we run.
+  AddInputFromArray<float>(TensorShape({1}), {2});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
 }
 
@@ -233,10 +271,10 @@ TEST_F(QuantizedMatMulTest, Small_WithParams) {
                                                                3,
                                                                6,
                                                            });
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
   TF_ASSERT_OK(RunOpKernel());
   // We're requesting C = A.transposed() * B,
   // so we expect to get these results:
@@ -326,10 +364,10 @@ TEST_F(QuantizedMatMulTest, Medium_WithParams) {
 
   AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>());
   AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {a_min});
-  AddInputFromArray<float>(TensorShape({1}), {a_max});
-  AddInputFromArray<float>(TensorShape({1}), {b_min});
-  AddInputFromArray<float>(TensorShape({1}), {b_max});
+  AddInputFromArray<float>(TensorShape({}), {a_min});
+  AddInputFromArray<float>(TensorShape({}), {a_max});
+  AddInputFromArray<float>(TensorShape({}), {b_min});
+  AddInputFromArray<float>(TensorShape({}), {b_max});
   TF_ASSERT_OK(RunOpKernel());
 
   Tensor expected_float(DT_FLOAT, {a_cols, b_cols});
diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/core/kernels/quantized_pooling_ops.cc
index b512369b3c4dd9..5673fb6ee00a5b 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops.cc
+++ b/tensorflow/core/kernels/quantized_pooling_ops.cc
@@ -15,18 +15,18 @@ limitations under the License.
 
 // See docs in ../ops/nn_ops.cc.
 
-#include "tensorflow/core/framework/op_requires.h"
-#include "tensorflow/core/platform/errors.h"
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
@@ -67,8 +67,20 @@ class QuantizedAvgPoolingOp : public OpKernel {
       return;
     }
 
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "min_input shape must be rank 0 but is rank ",
+                    min_input_tensor.dims(),
+                    ", received shape: ", min_input_tensor.shape()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "max_input shape must be rank 0 but is rank ",
+                    max_input_tensor.dims(),
+                    ", received shape: ", max_input_tensor.shape()));
+    const float min_input = context->input(1).scalar<float>()();
+    const float max_input = context->input(2).scalar<float>()();
 
     OP_REQUIRES(context, params.depth_window == 1,
                 errors::Unimplemented("Non-spatial pooling is not "
@@ -119,20 +131,20 @@ class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> {
       : MaxPoolingOp<Device, T>(context) {}
 
   void Compute(OpKernelContext* context) override {
-    auto min_input_tensor = context->input(1);
-    auto max_input_tensor = context->input(2);
-    OP_REQUIRES(
-        context, min_input_tensor.NumElements() == 1,
-        errors::InvalidArgument(
-            "min_input must be a scalar float value, got tensor with shape ",
-            min_input_tensor.shape()));
-    OP_REQUIRES(
-        context, max_input_tensor.NumElements() == 1,
-        errors::InvalidArgument(
-            "max_input must be a scalar float value, got tensor with shape ",
-            max_input_tensor.shape()));
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "min_input shape must be rank 0 but is rank ",
+                    min_input_tensor.dims(),
+                    ", received shape: ", min_input_tensor.shape()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "max_input shape must be rank 0 but is rank ",
+                    max_input_tensor.dims(),
+                    ", received shape: ", max_input_tensor.shape()));
+    const float min_input = context->input(1).scalar<float>()();
+    const float max_input = context->input(2).scalar<float>()();
     MaxPoolingOp<Device, T>::Compute(context);
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
diff --git a/tensorflow/core/kernels/quantized_pooling_ops_test.cc b/tensorflow/core/kernels/quantized_pooling_ops_test.cc
index fc0417e5431b27..9e56890478be24 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops_test.cc
+++ b/tensorflow/core/kernels/quantized_pooling_ops_test.cc
@@ -69,8 +69,8 @@ TEST_F(QuantizedPoolingTest, SmallAveragePooling) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -114,8 +114,8 @@ TEST_F(QuantizedPoolingTest, SmallMaxPooling) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/ragged_range_op.cc b/tensorflow/core/kernels/ragged_range_op.cc
index 066e5d638bbc43..469ef06b4b3bb6 100644
--- a/tensorflow/core/kernels/ragged_range_op.cc
+++ b/tensorflow/core/kernels/ragged_range_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <cstdint>
 #include <limits>
 #include <memory>
 #include <string>
@@ -78,8 +79,25 @@ class RaggedRangeOp : public OpKernel {
       T limit = broadcast_limits ? limits(0) : limits(row);
       T delta = broadcast_deltas ? deltas(0) : deltas(row);
       OP_REQUIRES(context, delta != 0, InvalidArgument("Requires delta != 0"));
-      rt_nested_splits(row + 1) =
-          rt_nested_splits(row) + RangeSize(start, limit, delta);
+      int64_t size;  // The number of elements in the specified range.
+      if (((delta > 0) && (limit < start)) ||
+          ((delta < 0) && (limit > start))) {
+        size = 0;
+      } else if (std::is_integral<T>::value) {
+        // The following is copied from tensorflow::RangeOp::Compute().
+        size = Eigen::divup(Eigen::numext::abs(limit - start),
+                            Eigen::numext::abs(delta));
+      } else {
+        // The following is copied from tensorflow::RangeOp::Compute().
+        auto size_auto =
+            Eigen::numext::ceil(Eigen::numext::abs((limit - start) / delta));
+        OP_REQUIRES(
+            context, size_auto <= std::numeric_limits<int64_t>::max(),
+            errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
+                                    std::numeric_limits<int64_t>::max()));
+        size = static_cast<int64_t>(size_auto);
+      }
+      rt_nested_splits(row + 1) = rt_nested_splits(row) + size;
     }
     SPLITS_TYPE nvals = rt_nested_splits(nrows);
 
@@ -99,19 +117,6 @@ class RaggedRangeOp : public OpKernel {
       }
     }
   }
-
- private:
-  // Returns the number of elements in the specified range.
-  SPLITS_TYPE RangeSize(T start, T limit, T delta) {
-    if (((delta > 0) && (limit < start)) || ((delta < 0) && (limit > start))) {
-      return 0;
-    }
-    // The following is copied from tensorflow::RangeOp::Compute().
-    return (std::is_integral<T>::value
-                ? ((std::abs(limit - start) + std::abs(delta) - 1) /
-                   std::abs(delta))
-                : std::ceil(std::abs((limit - start) / delta)));
-  }
 };
 
 #define REGISTER_CPU_KERNEL(TYPE)                                  \
diff --git a/tensorflow/core/kernels/ragged_range_op_test.cc b/tensorflow/core/kernels/ragged_range_op_test.cc
index 94aaedde3420e0..fc3b302eeb7e30 100644
--- a/tensorflow/core/kernels/ragged_range_op_test.cc
+++ b/tensorflow/core/kernels/ragged_range_op_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <gtest/gtest.h>
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -77,6 +78,17 @@ TEST_F(RaggedRangeOpTest, FloatValues) {
       test::AsTensor<float>({0, 2, 4, 6, 5, 6, 5, 4, 3, 2}), 0.1);
 }
 
+TEST_F(RaggedRangeOpTest, RangeSizeOverflow) {
+  BuildRaggedRangeGraph<float>();
+  AddInputFromArray<float>(TensorShape({2}), {1.1, 0.1});    // starts
+  AddInputFromArray<float>(TensorShape({2}), {10.0, 1e10});  // limits
+  AddInputFromArray<float>(TensorShape({2}), {1, 1e-10});    // deltas
+
+  EXPECT_EQ(absl::StrCat("Requires ((limit - start) / delta) <= ",
+                         std::numeric_limits<int64_t>::max()),
+            RunOpKernel().error_message());
+}
+
 TEST_F(RaggedRangeOpTest, BroadcastDeltas) {
   BuildRaggedRangeGraph<int>();
   AddInputFromArray<int>(TensorShape({3}), {0, 5, 8});  // starts
diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
index 6975baa5bcff59..0883bc28893488 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
@@ -145,6 +145,10 @@ class RaggedTensorToVariantOp : public OpKernel {
     batched_ragged_input.mutable_nested_splits()->reserve(
         ragged_nested_splits_len);
     for (int i = 0; i < ragged_nested_splits_len; i++) {
+      OP_REQUIRES(context, ragged_nested_splits_in[i].dims() == 1,
+                  errors::InvalidArgument("Requires nested_row_splits[", i, "]",
+                                          " to be rank 1 but is rank ",
+                                          ragged_nested_splits_in[i].dims()));
       batched_ragged_input.append_splits(ragged_nested_splits_in[i]);
     }
 
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 8ec9ed7d24b081..b85c120eba9aa8 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -166,7 +166,7 @@ class RandomGammaOp : public OpKernel {
     }
     const int64_t samples_per_alpha = samples_shape.num_elements();
 
-    samples_shape.AppendShape(alpha_t.shape());
+    OP_REQUIRES_OK(ctx, samples_shape.AppendShapeWithStatus(alpha_t.shape()));
     // Allocate output samples.
     Tensor* samples_t = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, samples_shape, &samples_t));
diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc
index b4c4d5d95c1881..a14bee790753ca 100644
--- a/tensorflow/core/kernels/random_poisson_op.cc
+++ b/tensorflow/core/kernels/random_poisson_op.cc
@@ -296,8 +296,8 @@ class RandomPoissonOp : public OpKernel {
     TensorShape samples_shape;
     OP_REQUIRES_OK(ctx, tensor::MakeShape(shape_t, &samples_shape));
     const int64_t num_samples = samples_shape.num_elements();
+    OP_REQUIRES_OK(ctx, samples_shape.AppendShapeWithStatus(rate_t.shape()));
 
-    samples_shape.AppendShape(rate_t.shape());
     // Allocate output samples.
     Tensor* samples_t = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, samples_shape, &samples_t));
diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc
index 3259e5ddd096aa..bc5de171639267 100644
--- a/tensorflow/core/kernels/requantize.cc
+++ b/tensorflow/core/kernels/requantize.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <math.h>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/meta_support.h"
@@ -38,10 +40,34 @@ class RequantizeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    const float input_min_float = ctx->input(1).flat<float>()(0);
-    const float input_max_float = ctx->input(2).flat<float>()(0);
-    const float requested_output_min_float = ctx->input(3).flat<float>()(0);
-    const float requested_output_max_float = ctx->input(4).flat<float>()(0);
+
+    const Tensor& input_min = ctx->input(1);
+    const Tensor& input_max = ctx->input(2);
+    const Tensor& requested_output_min = ctx->input(3);
+    const Tensor& requested_output_max = ctx->input(4);
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_min.shape()),
+        errors::InvalidArgument("`input_min` must be rank 0 but is rank ",
+                                input_min.dims()));
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_max.shape()),
+        errors::InvalidArgument("`input_max` must be rank 0 but is rank ",
+                                input_max.dims()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_min.shape()),
+                errors::InvalidArgument(
+                    "`requested_output_min` must be rank 0 but is rank ",
+                    requested_output_min.dims()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_max.shape()),
+                errors::InvalidArgument(
+                    "`requested_output_max` must be rank 0 but is rank ",
+                    requested_output_max.dims()));
+
+    const float input_min_float = input_min.flat<float>()(0);
+    const float input_max_float = input_max.flat<float>()(0);
+    const float requested_output_min_float =
+        requested_output_min.flat<float>()(0);
+    const float requested_output_max_float =
+        requested_output_max.flat<float>()(0);
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/requantize_op_test.cc b/tensorflow/core/kernels/requantize_op_test.cc
index 133f92b029d8da..5663520fdbfda9 100644
--- a/tensorflow/core/kernels/requantize_op_test.cc
+++ b/tensorflow/core/kernels/requantize_op_test.cc
@@ -53,10 +53,10 @@ TEST_F(RequantizeTest, HandCraftedRequantize) {
   // Requantize to -1 to 1.
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
   TF_ASSERT_OK(RunOpKernel());
   Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count}));
   test::FillValues<quint8>(&expected, {0, 128, 255});
@@ -71,10 +71,10 @@ TEST_F(RequantizeTest, InvalidOutputMin) {
 
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0.01f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {0.01f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
   EXPECT_EQ("requested_output_min must be <= 0, but got 0.01",
             RunOpKernel().error_message());
 }
@@ -85,10 +85,10 @@ TEST_F(RequantizeTest, InvalidOutputMax) {
 
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-10.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-11.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-10.0f});
+  AddInputFromArray<float>(TensorShape({}), {-11.0f});
   EXPECT_EQ(
       "requested_output_max must be >= requested_output_min, but got -11 and "
       "-10",
diff --git a/tensorflow/core/kernels/reshape_op.h b/tensorflow/core/kernels/reshape_op.h
index cd8ffefdff2274..9f6dd2c156cea2 100644
--- a/tensorflow/core/kernels/reshape_op.h
+++ b/tensorflow/core/kernels/reshape_op.h
@@ -45,6 +45,11 @@ class ReshapeOp : public OpKernel {
          TensorShapeUtils::IsScalar(sizes.shape())),
         errors::InvalidArgument("sizes input must be 1-D, not ",
                                 sizes.shape().DebugString()));
+    OP_REQUIRES(
+        context, sizes.NumElements() < TensorShape::MaxDimensions(),
+        errors::InvalidArgument("too many dimensions: must be < ",
+                                TensorShape::MaxDimensions(), ", but received ",
+                                sizes.NumElements()));
 
     // Compute the output shape.  Determine product of specified
     // dimensions, and find the index of the unspecified one.
diff --git a/tensorflow/core/kernels/rnn/lstm_ops.cc b/tensorflow/core/kernels/rnn/lstm_ops.cc
index 711fc8f08275d8..b0a27c1914af74 100644
--- a/tensorflow/core/kernels/rnn/lstm_ops.cc
+++ b/tensorflow/core/kernels/rnn/lstm_ops.cc
@@ -416,6 +416,65 @@ class LSTMBlockCellOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
+    // Sanity check that each of the tensors have the required NDIMS.
+    OP_REQUIRES(ctx, x_tensor->dims() == 2,
+                errors::InvalidArgument("x_tensor must be rank 2 but is rank ",
+                                        x_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, cs_prev_tensor->dims() == 2,
+        errors::InvalidArgument("cs_prev_tensor must be rank 2 but is rank ",
+                                cs_prev_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, h_prev_tensor->dims() == 2,
+        errors::InvalidArgument("h_prev_tensor must be rank 2 but is rank ",
+                                h_prev_tensor->dims(), "."));
+    OP_REQUIRES(ctx, w_tensor->dims() == 2,
+                errors::InvalidArgument("w_tensor must be rank 2 but is rank ",
+                                        w_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, wci_tensor->dims() == 1,
+        errors::InvalidArgument("wci_tensor must be rank 1 but is rank ",
+                                wci_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, wcf_tensor->dims() == 1,
+        errors::InvalidArgument("wcf_tensor must be rank 1 but is rank ",
+                                wci_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, wco_tensor->dims() == 1,
+        errors::InvalidArgument("wco_tensor must be rank 1 but is rank ",
+                                wco_tensor->dims(), "."));
+    OP_REQUIRES(ctx, b_tensor->dims() == 1,
+                errors::InvalidArgument("b_tensor must be rank 1 but is rank ",
+                                        b_tensor->dims(), "."));
+    OP_REQUIRES(ctx, xh_tensor.dims() == 2,
+                errors::InvalidArgument("xh_tensor must be rank 2 but is rank ",
+                                        xh_tensor.dims(), "."));
+    OP_REQUIRES(ctx, i_tensor->dims() == 2,
+                errors::InvalidArgument("i_tensor must be rank 2 but is rank ",
+                                        i_tensor->dims(), "."));
+    OP_REQUIRES(ctx, cs_tensor->dims() == 2,
+                errors::InvalidArgument("cs_tensor must be rank 2 but is rank ",
+                                        cs_tensor->dims(), "."));
+    OP_REQUIRES(ctx, f_tensor->dims() == 2,
+                errors::InvalidArgument("f_tensor must be rank 2 but is rank ",
+                                        f_tensor->dims(), "."));
+    OP_REQUIRES(ctx, o_tensor->dims() == 2,
+                errors::InvalidArgument("o_tensor must be rank 2 but is rank ",
+                                        o_tensor->dims(), "."));
+    OP_REQUIRES(ctx, ci_tensor->dims() == 2,
+                errors::InvalidArgument("ci_tensor must be rank 2 but is rank ",
+                                        ci_tensor->dims(), "."));
+    OP_REQUIRES(ctx, co_tensor->dims() == 2,
+                errors::InvalidArgument("co_tensor must be rank 2 but is rank ",
+                                        co_tensor->dims(), "."));
+    OP_REQUIRES(
+        ctx, gates_tensor.dims() == 2,
+        errors::InvalidArgument("gates_tensor must be rank 2 but is rank ",
+                                gates_tensor.dims(), "."));
+    OP_REQUIRES(ctx, h_tensor->dims() == 2,
+                errors::InvalidArgument("h_tensor must be rank 2 but is rank ",
+                                        h_tensor->dims(), "."));
+
     functor::LSTMBlockCellFprop<Device, T, USE_CUBLAS, gate_layout>(
         batch_size, input_size, cell_size)(
         ctx, device, forget_bias_, cell_clip_, use_peephole_,
@@ -1079,19 +1138,30 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Tensor* x;
     OP_REQUIRES_OK(ctx, ctx->input("x", &x));
-    OP_REQUIRES(ctx, x->dims() == 3, errors::InvalidArgument("x must be 3D"));
+    OP_REQUIRES(
+        ctx, x->dims() == 3,
+        errors::InvalidArgument("x must be rank 3 but is rank ", x->dims()));
     const int64_t timelen = x->dim_size(0);
     const int64_t batch_size = x->dim_size(1);
     const int64_t input_size = x->dim_size(2);
 
     const Tensor* cs_prev_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("cs_prev", &cs_prev_tensor));
+    OP_REQUIRES(ctx, cs_prev_tensor->dims() == 2,
+                errors::InvalidArgument("cs_prev must be rank 2 but is rank ",
+                                        cs_prev_tensor->dims()));
 
     const Tensor* h_prev_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("h_prev", &h_prev_tensor));
+    OP_REQUIRES(ctx, h_prev_tensor->dims() == 2,
+                errors::InvalidArgument("h_prev must be rank 2 but is rank ",
+                                        h_prev_tensor->dims()));
 
     const Tensor* w_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
+    OP_REQUIRES(ctx, w_tensor->dims() == 2,
+                errors::InvalidArgument("w must be rank 2 but is rank ",
+                                        w_tensor->dims()));
     const int64_t cell_size = w_tensor->dim_size(1) / 4;
     OP_REQUIRES(ctx, input_size + cell_size == w_tensor->dim_size(0),
                 errors::InvalidArgument(
@@ -1100,15 +1170,27 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Tensor* wci_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
+    OP_REQUIRES(ctx, wci_tensor->dims() == 1,
+                errors::InvalidArgument("wci must be rank 1 but is rank ",
+                                        wci_tensor->dims()));
 
     const Tensor* wcf_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wcf", &wcf_tensor));
+    OP_REQUIRES(ctx, wcf_tensor->dims() == 1,
+                errors::InvalidArgument("wcf must be rank 1 but is rank ",
+                                        wcf_tensor->dims()));
 
     const Tensor* wco_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wco", &wco_tensor));
+    OP_REQUIRES(ctx, wco_tensor->dims() == 1,
+                errors::InvalidArgument("wco must be rank 1 but is rank ",
+                                        wco_tensor->dims()));
 
     const Tensor* b_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("b", &b_tensor));
+    OP_REQUIRES(ctx, b_tensor->dims() == 1,
+                errors::InvalidArgument("b must be rank 1 but is rank ",
+                                        b_tensor->dims()));
     OP_REQUIRES(
         ctx, cell_size == b_tensor->dim_size(0) / 4,
         errors::InvalidArgument("w and b cell_size don't match: ", cell_size,
diff --git a/tensorflow/core/kernels/sdca_internal.cc b/tensorflow/core/kernels/sdca_internal.cc
index 58d83f6936a8a0..b2a9bc630af6e1 100644
--- a/tensorflow/core/kernels/sdca_internal.cc
+++ b/tensorflow/core/kernels/sdca_internal.cc
@@ -389,6 +389,13 @@ Status Examples::Initialize(OpKernelContext* const context,
   OpInputList dense_features_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("dense_features", &dense_features_inputs));
+  for (int i = 0; i < dense_features_inputs.size(); ++i) {
+    if (!TensorShapeUtils::IsMatrix(dense_features_inputs[i].shape())) {
+      return errors::InvalidArgument("Dense features at index ", i,
+                                     " must be rank 2 but is rank ",
+                                     dense_features_inputs[i].dims());
+    }
+  }
 
   examples_.clear();
   examples_.resize(num_examples);
diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc
index 98b4fd1c82b239..d279eda86e741c 100644
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@@ -49,6 +49,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -142,6 +143,10 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) {
   const Tensor* example_state_data_t;
   OP_REQUIRES_OK(context,
                  context->input("example_state_data", &example_state_data_t));
+  OP_REQUIRES(
+      context, TensorShapeUtils::IsMatrix(example_state_data_t->shape()),
+      errors::InvalidArgument("example_state_data must be rank 2 but is rank ",
+                              example_state_data_t->dims()));
   TensorShape expected_example_state_shape({examples.num_examples(), 4});
   OP_REQUIRES(context,
               example_state_data_t->shape() == expected_example_state_shape,
diff --git a/tensorflow/core/kernels/searchsorted_op.cc b/tensorflow/core/kernels/searchsorted_op.cc
index 019e704738f7dd..5c06ce1e4bfd43 100644
--- a/tensorflow/core/kernels/searchsorted_op.cc
+++ b/tensorflow/core/kernels/searchsorted_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -115,6 +116,14 @@ class UpperBoundOp : public OpKernel {
     auto output = output_t->template flat<OutType>();
     const auto sorted_inputs = sorted_inputs_t.template flat<T>();
     const auto values = values_t.template flat<T>();
+
+    // For empty inputs, all values will be placed at the zeroth position.
+    if (sorted_inputs.size() == 0) {
+      functor::SetZeroFunctor<Device, OutType> set_zero;
+      set_zero(ctx->eigen_device<Device>(), output);
+      return;
+    }
+
     OP_REQUIRES_OK(
         ctx, functor::UpperBoundFunctor<Device, T, OutType>::Compute(
                  ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
@@ -160,6 +169,14 @@ class LowerBoundOp : public OpKernel {
     auto output = output_t->template flat<OutType>();
     const auto sorted_inputs = sorted_inputs_t.template flat<T>();
     const auto values = values_t.template flat<T>();
+
+    // For empty inputs, all values will be placed at the zeroth position.
+    if (sorted_inputs.size() == 0) {
+      functor::SetZeroFunctor<Device, OutType> set_zero;
+      set_zero(ctx->eigen_device<Device>(), output);
+      return;
+    }
+
     OP_REQUIRES_OK(
         ctx, functor::LowerBoundFunctor<Device, T, OutType>::Compute(
                  ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
diff --git a/tensorflow/core/kernels/session_ops.cc b/tensorflow/core/kernels/session_ops.cc
index 11f991e1ba34fe..2feb79a24e329e 100644
--- a/tensorflow/core/kernels/session_ops.cc
+++ b/tensorflow/core/kernels/session_ops.cc
@@ -98,6 +98,8 @@ class GetSessionTensorOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& handle = ctx->input(0);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(handle.shape()),
+                errors::InvalidArgument("handle must be scalar"));
     const string& name = handle.scalar<tstring>()();
     Tensor val;
     auto session_state = ctx->session_state();
@@ -132,6 +134,8 @@ class DeleteSessionTensorOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& handle = ctx->input(0);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(handle.shape()),
+                errors::InvalidArgument("`handle` must be scalar"));
     const string& name = handle.scalar<tstring>()();
     auto session_state = ctx->session_state();
     OP_REQUIRES(ctx, session_state != nullptr,
diff --git a/tensorflow/core/kernels/set_kernels.cc b/tensorflow/core/kernels/set_kernels.cc
index 74b5b543b7c7e3..0e610ac4136a56 100644
--- a/tensorflow/core/kernels/set_kernels.cc
+++ b/tensorflow/core/kernels/set_kernels.cc
@@ -70,8 +70,12 @@ Status SparseTensorFromContext(OpKernelContext* ctx, const int32_t base_index,
                                sparse::SparseTensor* tensor) {
   // Assume row-major order.
   TensorShape shape;
-  TF_RETURN_IF_ERROR(TensorShape::BuildTensorShape(
-      ctx->input(base_index + 2).vec<int64_t>(), &shape));
+  const Tensor& shape_tensor = ctx->input(base_index + 2);
+  if (shape_tensor.dims() != 1) {
+    return errors::InvalidArgument("Shape must be a 1D tensor.");
+  }
+  TF_RETURN_IF_ERROR(
+      TensorShape::BuildTensorShape(shape_tensor.vec<int64_t>(), &shape));
   CheckRankAtLeast2(ctx, shape);
   std::vector<int64_t> order(shape.dims());
   std::iota(order.begin(), order.end(), 0);
diff --git a/tensorflow/core/kernels/sobol_op.cc b/tensorflow/core/kernels/sobol_op.cc
index 94fff6baea2688..484bac99463300 100644
--- a/tensorflow/core/kernels/sobol_op.cc
+++ b/tensorflow/core/kernels/sobol_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "sobol_data.h"  // from @sobol_data
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/platform_strings.h"
 
@@ -134,8 +135,14 @@ class SobolSampleOp : public OpKernel {
       : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(0).shape()),
+                errors::InvalidArgument("dim must be a scalar"));
     int32_t dim = context->input(0).scalar<int32_t>()();
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(1).shape()),
+                errors::InvalidArgument("num_results must be a scalar"));
     int32_t num_results = context->input(1).scalar<int32_t>()();
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(2).shape()),
+                errors::InvalidArgument("skip must be a scalar"));
     int32_t skip = context->input(2).scalar<int32_t>()();
 
     OP_REQUIRES(context, dim >= 1,
diff --git a/tensorflow/core/kernels/spacetobatch_op.cc b/tensorflow/core/kernels/spacetobatch_op.cc
index 009500f79268fc..e391529d852b04 100644
--- a/tensorflow/core/kernels/spacetobatch_op.cc
+++ b/tensorflow/core/kernels/spacetobatch_op.cc
@@ -21,8 +21,6 @@ limitations under the License.
 #include <string>
 #include <utility>
 
-#include "tensorflow/core/kernels/spacetobatch_functor.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -31,8 +29,10 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/spacetobatch_functor.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 
@@ -99,7 +99,13 @@ Status SpaceToBatchOpCompute(OpKernelContext* context,
   // Compute the product of the block_shape values.
   int64_t block_shape_product = 1;
   for (int block_dim = 0; block_dim < block_dims; ++block_dim) {
-    block_shape_product *= block_shape[block_dim];
+    if (block_shape[block_dim] < 1) {
+      return errors::InvalidArgument(
+          "All values in block_shape must be positive, got value, ",
+          block_shape[block_dim], " at index ", block_dim, ".");
+    }
+    block_shape_product =
+        MultiplyWithoutOverflow(block_shape_product, block_shape[block_dim]);
   }
   if (block_shape_product <= 0) {
     return errors::InvalidArgument(
@@ -131,8 +137,14 @@ Status SpaceToBatchOpCompute(OpKernelContext* context,
   // The actual output shape exposed to callers.
   TensorShape external_output_shape;
 
-  external_output_shape.AddDim(orig_input_tensor.dim_size(0) *
-                               block_shape_product);
+  const int64_t output_shape = MultiplyWithoutOverflow(
+      orig_input_tensor.dim_size(0), block_shape_product);
+  if (output_shape < 0) {
+    return errors::InvalidArgument(
+        "Negative output dimension size caused by overflow when multiplying ",
+        orig_input_tensor.dim_size(0), " and ", block_shape_product);
+  }
+  external_output_shape.AddDim(output_shape);
 
   int64_t input_batch_size = orig_input_tensor.dim_size(0);
   for (int block_dim = 0; block_dim < removed_prefix_block_dims; ++block_dim) {
diff --git a/tensorflow/core/kernels/sparse/sparse_matrix.h b/tensorflow/core/kernels/sparse/sparse_matrix.h
index 3476aa48f5d052..5e70b07ff8a7ed 100644
--- a/tensorflow/core/kernels/sparse/sparse_matrix.h
+++ b/tensorflow/core/kernels/sparse/sparse_matrix.h
@@ -25,10 +25,12 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -633,6 +635,11 @@ template <typename T>
 Status ExtractVariantFromInput(OpKernelContext* ctx, int index,
                                const T** value) {
   const Tensor& input_t = ctx->input(index);
+  if (!TensorShapeUtils::IsScalar(input_t.shape())) {
+    return errors::InvalidArgument(
+        "Invalid input matrix: Shape must be rank 0 but is rank ",
+        input_t.dims());
+  }
   const Variant& input_variant = input_t.scalar<Variant>()();
   *value = input_variant.get<T>();
   if (*value == nullptr) {
diff --git a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
index b18101a8a174b2..e50b201e553a79 100644
--- a/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
+++ b/tensorflow/core/kernels/sparse/sparse_tensor_to_csr_sparse_matrix_op.cc
@@ -67,6 +67,13 @@ class SparseTensorToCSRSparseMatrixCPUOp : public OpKernel {
     const Tensor& values = ctx->input(1);
     const Tensor& dense_shape = ctx->input(2);
     const int rank = dense_shape.NumElements();
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsVector(dense_shape.shape()),
+        errors::InvalidArgument("dense_shape must be rank 1 but got rank",
+                                dense_shape.shape().dims()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(indices.shape()),
+                errors::InvalidArgument("indices must be rank 2 but got rank",
+                                        indices.shape().dims()));
     OP_REQUIRES(ctx, rank == 2 || rank == 3,
                 errors::InvalidArgument("SparseTensor must have rank 2 or 3; ",
                                         "but indices has rank: ", rank));
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index 09c503e2ec216b..f4fa54a670e22c 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -24,12 +24,14 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/strong_hash.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -832,6 +834,10 @@ class SparseCrossV2Op : public OpKernel {
 
     const Tensor* sep_t;
     OP_REQUIRES_OK(context, context->input("sep", &sep_t));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(sep_t->shape()),
+                errors::InvalidArgument("Input separator should be a scalar. "
+                                        "Received: ",
+                                        sep_t->DebugString()));
     const tstring separator = sep_t->scalar<tstring>()();
 
     std::vector<std::unique_ptr<ColumnInterface<tstring>>> columns =
diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
index 05b734dea231b8..f5613fc6d55e7f 100644
--- a/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
@@ -297,9 +297,12 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
       empty_row_indicator = empty_row_indicator_t.vec<bool>().data();
     }
 
-    TF_RETURN_IF_ERROR(wrap_kernel_call(ComputeEmptyRowIndicatorKernel<Tindex>,
-                                        /*device=*/device, /*size=*/dense_rows,
-                                        elements_per_row, empty_row_indicator));
+    if (dense_rows > 0) {
+      TF_RETURN_IF_ERROR(
+          wrap_kernel_call(ComputeEmptyRowIndicatorKernel<Tindex>,
+                           /*device=*/device, /*size=*/dense_rows,
+                           elements_per_row, empty_row_indicator));
+    }
 
     // For each row, the number of empty rows up to and including that row.
     Tensor num_empty_rows_through_t;
@@ -405,14 +408,16 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
             done);
       }
 
-      OP_REQUIRES_OK_ASYNC(
-          context,
-          wrap_kernel_call(ScatterNewElementsKernel<T, Tindex>,
-                           /*device=*/device, /*size=*/dense_rows, rank,
-                           default_value, num_empty_rows_through,
-                           input_row_ends, empty_row_indicator, output_indices,
-                           output_values),
-          done);
+      if (dense_rows > 0) {
+        OP_REQUIRES_OK_ASYNC(
+            context,
+            wrap_kernel_call(ScatterNewElementsKernel<T, Tindex>,
+                             /*device=*/device, /*size=*/dense_rows, rank,
+                             default_value, num_empty_rows_through,
+                             input_row_ends, empty_row_indicator,
+                             output_indices, output_values),
+            done);
+      }
 
       done();
     };
@@ -460,9 +465,11 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
     TF_RETURN_IF_ERROR(
         context->allocate_temp(index_type, TensorShape({N}), &row_indices_t));
     auto row_indices = row_indices_t.flat<Tindex>();
-    TF_RETURN_IF_ERROR(wrap_kernel_call(CopyRowIndicesKernel<Tindex>,
-                                        /*device=*/device, /*size=*/N, rank,
-                                        indices, row_indices));
+    if (N > 0) {
+      TF_RETURN_IF_ERROR(wrap_kernel_call(CopyRowIndicesKernel<Tindex>,
+                                          /*device=*/device, /*size=*/N, rank,
+                                          indices, row_indices));
+    }
     // Allocate input_index_map.
     TF_RETURN_IF_ERROR(context->allocate_temp(index_type, TensorShape({N}),
                                               input_index_map_t));
@@ -527,9 +534,11 @@ struct SparseFillEmptyRowsGrad<GPUDevice, T, Tindex> {
     auto visited = visited_t.vec<bool>();
     visited.device(device) = visited.constant(false);
 
-    TF_RETURN_IF_ERROR(wrap_kernel_call(
-        GatherOriginalGradValuesKernel<T, Tindex>, /*device=*/device,
-        /*size=*/N, reverse_index_map, grad_values, d_values, visited));
+    if (N > 0) {
+      TF_RETURN_IF_ERROR(wrap_kernel_call(
+          GatherOriginalGradValuesKernel<T, Tindex>, /*device=*/device,
+          /*size=*/N, reverse_index_map, grad_values, d_values, visited));
+    }
 
     // Now we mask out the visited values and sum the remaining ones (which
     // correspond to the empty rows in the forward input) to compute
diff --git a/tensorflow/core/kernels/sparse_tensor_dense_add_op.cc b/tensorflow/core/kernels/sparse_tensor_dense_add_op.cc
index 48803e4b939800..6d6b05bf70f30a 100644
--- a/tensorflow/core/kernels/sparse_tensor_dense_add_op.cc
+++ b/tensorflow/core/kernels/sparse_tensor_dense_add_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse_tensor_dense_add_op.h"
 
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_util.h"
@@ -47,6 +48,17 @@ Status ValidateInputs(const Tensor *a_indices, const Tensor *a_values,
         a_values->shape().DebugString(), " and ",
         a_shape->shape().DebugString());
   }
+  int64_t nnz = a_indices->dim_size(0);
+  int64_t ndims = a_indices->dim_size(1);
+  if (a_values->dim_size(0) != nnz) {
+    return errors::InvalidArgument("Dimensions ", nnz, " and ",
+                                   a_values->dim_size(0),
+                                   " are not compatible");
+  }
+  if (a_shape->dim_size(0) != ndims) {
+    return errors::InvalidArgument("Dimensions ", ndims, " and ",
+                                   a_shape->dim_size(0), " are not compatible");
+  }
   if (a_shape->NumElements() != b->dims()) {
     return errors::InvalidArgument(
         "Two operands have different ranks; received: ", a_shape->NumElements(),
@@ -61,6 +73,24 @@ Status ValidateInputs(const Tensor *a_indices, const Tensor *a_values,
           a_shape_flat(i), " vs dense side ", b->dim_size(i));
     }
   }
+
+  // Check for invalid indices.
+  const auto a_indices_mat = a_indices->flat_inner_dims<Index>();
+
+  for (int64_t zidx = 0; zidx < nnz; ++zidx) {
+    for (int64_t didx = 0; didx < ndims; ++didx) {
+      const Index idx = a_indices_mat(zidx, didx);
+      if (idx < 0 || idx >= a_shape_flat(didx)) {
+        return errors::InvalidArgument(
+            "Sparse tensor has an invalid index on dimension ", didx,
+            ": "
+            "a_indices(",
+            zidx, ",", didx, ") = ", idx,
+            ", dense tensor shape: ", a_shape_flat);
+      }
+    }
+  }
+
   return Status::OK();
 }
 
diff --git a/tensorflow/core/kernels/sparse_utils.cc b/tensorflow/core/kernels/sparse_utils.cc
index 75e42eebe25bef..ff340f81eb44e8 100644
--- a/tensorflow/core/kernels/sparse_utils.cc
+++ b/tensorflow/core/kernels/sparse_utils.cc
@@ -16,8 +16,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse_utils.h"
 
 #include <cstddef>
+#include <cstdint>
 
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/status.h"
 
 namespace tensorflow {
 namespace sparse_utils {
@@ -140,6 +144,165 @@ bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices) {
   return false;
 }
 
+namespace {
+
+// Ensures indices, values, shape are all of the proper ranks and are
+// compatible.
+Status ValidateSparseTensorShape(const Tensor& indices, const Tensor& values,
+                                 const Tensor& shape) {
+  // Indices must be a matrix, and values/shape must be a vector.
+  if (!TensorShapeUtils::IsMatrix(indices.shape())) {
+    return errors::InvalidArgument("Sparse indices must be rank 2 but is rank ",
+                                   indices.shape().dim_sizes().size());
+  }
+  if (!TensorShapeUtils::IsVector(values.shape())) {
+    return errors::InvalidArgument("Sparse values must be rank 1 but is rank ",
+                                   values.shape().dims());
+  }
+  if (!TensorShapeUtils::IsVector(shape.shape())) {
+    return errors::InvalidArgument("Sparse shape must be rank 1 but is rank ",
+                                   shape.shape().dims());
+  }
+  // Indices shape must be compatible with the values vector and dense shape.
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+  if (values.dim_size(0) != nnz) {
+    return errors::InvalidArgument("Number of elements in indices (", nnz,
+                                   ") and values (", values.dim_size(0),
+                                   ") do not match");
+  }
+  if (shape.NumElements() != ndims) {
+    return errors::InvalidArgument("Index rank (", ndims, ") and shape rank (",
+                                   shape.NumElements(), ") do not match");
+  }
+
+  return Status::OK();
+}
+
+// Creates a debug string for the index tuple in indices(row, :).
+template <typename IndexTensor>
+string CreateIndexString(const IndexTensor& indices, int64_t row) {
+  const int64_t ndims = indices.dimension(1);
+  string index_str = strings::StrCat("indices[", row, ", :] = [");
+  for (int64_t dim = 0; dim < ndims; ++dim) {
+    strings::StrAppend(&index_str, indices(row, dim),
+                       dim < ndims - 1 ? ", " : "]");
+  }
+  if (ndims == 0) {
+    strings::StrAppend(&index_str, "]");
+  }
+  return index_str;
+}
+
+// Ensures all sparse indices are within correct bounds.
+template <typename Tindices>
+Status ValidateSparseTensorIndicesUnordered(const Tensor& indices,
+                                            const Tensor& shape) {
+  // Ensure no index is out-of-bounds.
+  const auto indices_mat = indices.flat_inner_dims<Tindices>();
+  const auto shape_vec = shape.flat<Tindices>();
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+
+  for (int64_t i = 0; i < nnz; ++i) {
+    for (int64_t dim = 0; dim < ndims; ++dim) {
+      const Tindices idx = indices_mat(i, dim);
+      if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+        string index_str = CreateIndexString(indices_mat, i);
+        return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                       " is out of bounds");
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+// Ensures all sparse indices are within correct bounds and are
+// lexicographically ordered.
+template <typename Tindices>
+Status ValidateSparseTensorIndicesOrdered(const Tensor& indices,
+                                          const Tensor& shape) {
+  const auto indices_mat = indices.flat_inner_dims<Tindices>();
+  const auto shape_vec = shape.flat<Tindices>();
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+
+  if (nnz == 0) {
+    return Status::OK();
+  }
+
+  // First set of indices must be within range.
+  for (int64_t dim = 0; dim < ndims; ++dim) {
+    const Tindices idx = indices_mat(0, dim);
+    if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+      string index_str = CreateIndexString(indices_mat, 0);
+      return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                     " is out of bounds");
+    }
+  }
+
+  // Remaining set of indices must be within range and lexicographically
+  // larger than the previous.
+  for (int64_t i = 1; i < nnz; ++i) {
+    bool different = false;
+    for (int64_t dim = 0; dim < ndims; ++dim) {
+      const Tindices idx = indices_mat(i, dim);
+      const Tindices prev_idx = indices_mat(i - 1, dim);
+      // If indices are already different from previous i, the new index can
+      // be anything within the valid range.
+      if (TF_PREDICT_TRUE(different)) {
+        if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+          string index_str = CreateIndexString(indices_mat, i);
+          return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                         " is out of bounds");
+        }
+      } else {
+        // Otherwise, the new index must be >= previous and <= shape(dim).
+        if (TF_PREDICT_FALSE(idx < prev_idx || idx >= shape_vec(dim))) {
+          string index_str = CreateIndexString(indices_mat, i);
+          // Check if index is actually out of bounds.
+          if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+            return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                           " is out of bounds");
+          } else {
+            return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                           " is out of order");
+          }
+        } else if (TF_PREDICT_TRUE(idx > prev_idx)) {
+          different = true;
+        }
+      }  // if (different)
+    }    // for dim in [0, ndims)
+
+    if (TF_PREDICT_FALSE(!different)) {
+      string index_str = CreateIndexString(indices_mat, i);
+      return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                     " is repeated");
+    }
+  }  // for i in [1, nnz)
+
+  return Status::OK();
+}
+
+}  // namespace
+
+template <typename Tindices>
+Status ValidateSparseTensor(const Tensor& indices, const Tensor& values,
+                            const Tensor& shape,
+                            IndexValidation index_validation) {
+  TF_RETURN_IF_ERROR(ValidateSparseTensorShape(indices, values, shape));
+  switch (index_validation) {
+    case IndexValidation::kOrdered:
+      return ValidateSparseTensorIndicesOrdered<Tindices>(indices, shape);
+    case IndexValidation::kUnordered:
+      return ValidateSparseTensorIndicesUnordered<Tindices>(indices, shape);
+    case IndexValidation::kNone: {
+    }
+  }
+  return Status::OK();
+}
+
 #define REGISTER_SPARSE_UTIL_FUNCTIONS(TypeIndex)                           \
   template TypeIndex FindNextDenseRowStartIndex<TypeIndex>(                 \
       const TypeIndex sparse_index_begin,                                   \
@@ -151,7 +314,10 @@ bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices) {
       const std::vector<TypeIndex>& row_start_indices);                     \
   template std::vector<TypeIndex> ParseRowStartIndices<TypeIndex>(          \
       const tensorflow::Tensor& tensor,                                     \
-      const TypeIndex num_nonzero_entries_in_sparse_mat);
+      const TypeIndex num_nonzero_entries_in_sparse_mat);                   \
+  template Status ValidateSparseTensor<TypeIndex>(                          \
+      const Tensor& indices, const Tensor& values, const Tensor& shape,     \
+      IndexValidation index_validation)
 
 REGISTER_SPARSE_UTIL_FUNCTIONS(int32);
 REGISTER_SPARSE_UTIL_FUNCTIONS(int64);
diff --git a/tensorflow/core/kernels/sparse_utils.h b/tensorflow/core/kernels/sparse_utils.h
index d43b2e34470a5e..4e6ab744691c28 100644
--- a/tensorflow/core/kernels/sparse_utils.h
+++ b/tensorflow/core/kernels/sparse_utils.h
@@ -65,6 +65,23 @@ std::vector<Tindices> ParseRowStartIndices(
 template <typename Tindices>
 bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices);
 
+// Methods for validating sparse indices.
+enum class IndexValidation {
+  kNone,      // Indices are not used by the op, or are not directly accessible
+              // (e.g. on GPU).
+  kOrdered,   // Indices must be unique, in lexicographical order, and within
+              // safe bounds.
+  kUnordered  // Indices must be within safe bounds, but may repeat or appear
+              // out-of-order.
+};
+
+// Validates the three component tensors of a sparse tensor have the proper
+// shapes.  Also validates index values according to the method supplied.
+template <typename Tindices>
+Status ValidateSparseTensor(const Tensor& indices, const Tensor& values,
+                            const Tensor& shape,
+                            IndexValidation index_validation);
+
 }  // namespace sparse_utils
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/sparse_utils_test.cc b/tensorflow/core/kernels/sparse_utils_test.cc
index c480829237119c..94708480754495 100644
--- a/tensorflow/core/kernels/sparse_utils_test.cc
+++ b/tensorflow/core/kernels/sparse_utils_test.cc
@@ -15,27 +15,29 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/sparse_utils.h"
 
+#include <algorithm>
+#include <cstdint>
+#include <set>
+#include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/platform/status_matchers.h"
 #include "tensorflow/core/platform/test.h"
 
+namespace tensorflow {
+namespace sparse_utils {
 namespace {
 
-using ::int64_t;
-using tensorflow::DataType;
-using tensorflow::int32;
-using tensorflow::Tensor;
-using tensorflow::TTypes;
-using tensorflow::uint16;
-using tensorflow::uint32;
-using tensorflow::uint64;
-using tensorflow::sparse_utils::ContainsEmptyRows;
-using tensorflow::sparse_utils::FindNextDenseRowStartIndex;
-using tensorflow::sparse_utils::GetStartIndicesOfEachDenseRow;
-using tensorflow::sparse_utils::ParseRowStartIndices;
+using ::tensorflow::testing::StatusIs;
+using ::testing::MatchesRegex;
 
 TEST(SparseUtilsTest, GetStartIndicesOfEachDenseRow) {
   {
@@ -260,4 +262,288 @@ TEST(SparseUtilsTest, FindNextDenseRowStartIndex) {
   }
 }
 
+// Returns a shared random number generator.
+::tensorflow::random::SimplePhilox& RandomPhilox() {
+  // Safe initialization of static random generator.
+  static auto* philox =
+      new ::tensorflow::random::PhiloxRandom(tensorflow::testing::RandomSeed());
+  static auto* rnd = new ::tensorflow::random::SimplePhilox(philox);
+  return *rnd;
+}
+
+// Fills a tensor of indices with a unique set of random index tuples.
+// The `SetType` must be a std::set-like type (e.g. flat_hash_set, btree_set)
+// that is used to ensure uniqueness and governs the final index tuple order.
+// For example, use a hash set for unordered indices, and sorted set for
+// lexicographically ordered indices. The `shape` is used to ensure proper index
+// bounds.
+template <typename SetType>
+void FillIndicesWithRandomTuples(const TensorShape& shape, Tensor& indices) {
+  const int64_t nnz = indices.dim_size(0);
+  const int64_t ndims = indices.dim_size(1);
+
+  SetType indices_set;
+  int64_t count = 0;
+  // Generate nnz unique random tuples.
+  while (count < nnz) {
+    std::vector<int64_t> candidate(ndims);
+    for (int64_t d = 0; d < ndims; ++d) {
+      candidate[d] = RandomPhilox().Uniform64(shape.dim_size(d));
+    }
+    auto it = indices_set.insert(std::move(candidate));
+    if (it.second) {
+      ++count;
+    }
+  }
+
+  // Copy index tuples from set into index tensor.
+  auto indices_mat = indices.matrix<int64_t>();
+  int64_t row = 0;
+  for (const std::vector<int64_t>& idxs : indices_set) {
+    for (int64_t col = 0; col < ndims; ++col) {
+      indices_mat(row, col) = idxs[col];
+    }
+    ++row;
+  }
+}
+
+// Populates components of a sparse random tensor with provided number of
+// non-zeros `max_nnz` and tensor shape `shape`.  If `ordered`, output indices
+// are ordered lexicographically.
+void GenerateRandomSparseTensor(int64_t max_nnz, const TensorShape& shape,
+                                bool ordered, Tensor& output_indices,
+                                Tensor& output_values, Tensor& output_shape) {
+  const int64_t ndims = shape.dims();
+  // We cannot generate more elements than the total in the tensor, so
+  // potentially reduce nnz.
+  const int64_t nnz = std::min(shape.num_elements(), max_nnz);
+  output_indices = Tensor(DT_INT64, TensorShape({nnz, ndims}));
+  output_values = Tensor(DT_FLOAT, TensorShape({nnz}));
+  output_shape = Tensor(DT_INT64, TensorShape({ndims}));
+
+  // Generate random unique sparse indices.
+  if (ordered) {
+    // NOTE: absl::btree_set does not seem to be available in TF OSS.
+    FillIndicesWithRandomTuples<std::set<std::vector<int64_t>>>(shape,
+                                                                output_indices);
+  } else {
+    FillIndicesWithRandomTuples<absl::flat_hash_set<std::vector<int64_t>>>(
+        shape, output_indices);
+  }
+
+  auto values_vec = output_values.vec<float>();
+  values_vec.setRandom();
+
+  auto shape_vec = output_shape.vec<int64_t>();
+  for (int i = 0; i < shape.dims(); ++i) {
+    shape_vec(i) = shape.dim_size(i);
+  }
+}
+
+using ValidateSparseTensorTest = ::testing::TestWithParam<IndexValidation>;
+
+TEST_P(ValidateSparseTensorTest, ValidSparseTensorPasses) {
+  constexpr int kNumNonZeros = 1000;
+  const TensorShape kTensorShapes[] = {
+      {}, {3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    TF_EXPECT_OK((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                                index_validation)));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidIndicesRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  // Indices tensor must be rank 2, so try rank 0, 1, 3.
+  const TensorShape kInvalidIndicesShapes[] = {
+      {}, {kNumNonZeros}, {kNumNonZeros, kNumDims, 4}};
+  const IndexValidation index_validation = GetParam();
+  for (const TensorShape& invalid_shape : kInvalidIndicesShapes) {
+    const Tensor indices = Tensor(DT_INT64, invalid_shape);
+    const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+    const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse indices must be rank 2 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidValuesRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  // Values tensor must be rank 1, so try rank 0, 2.
+  const TensorShape kInvalidValuesShapes[] = {{}, {kNumNonZeros, 2}};
+  const IndexValidation index_validation = GetParam();
+  for (const TensorShape& invalid_shape : kInvalidValuesShapes) {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims}));
+    const Tensor values = Tensor(DT_FLOAT, invalid_shape);
+    const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse values must be rank 1 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidShapeRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  const IndexValidation index_validation = GetParam();
+  // Shape tensor must be rank 1, so try rank 0, 2.
+  const TensorShape kInvalidShapeShapes[] = {{}, {kNumDims, 2}};
+  for (const TensorShape& invalid_shape : kInvalidShapeShapes) {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims}));
+    const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+    const Tensor shape = Tensor(DT_INT64, invalid_shape);
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse shape must be rank 1 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IncompatibleShapesFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  const IndexValidation index_validation = GetParam();
+
+  const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+  const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+
+  // Indices and values must have the same size in dimension 0 (nnz).
+  {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros + 1, kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Number of elements in indices .* and "
+                                      "values .* do not match")));
+  }
+
+  // Each index tuple must have the same size in dimension 1 as the dense
+  // tensor shape (ndims).
+  {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims + 1}));
+    EXPECT_THAT(
+        (ValidateSparseTensor<int64_t>(indices, values, shape,
+                                       index_validation)),
+        StatusIs(error::INVALID_ARGUMENT,
+                 MatchesRegex("Index rank .* and shape rank .* do not match")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IndexOutOfBoundsFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumTests = 100;
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+
+  const TensorShape kTensorShapes[] = {{3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    // Access tensor values.
+    auto indices_mat = indices.matrix<int64_t>();
+    for (int test = 0; test < kNumTests; ++test) {
+      // Pick a random entry and dimension, and make the index out of bounds.
+      int64_t row = RandomPhilox().Uniform64(indices.dim_size(0));
+      int64_t dim = RandomPhilox().Uniform64(indices.dim_size(1));
+      int64_t old_val = indices_mat(row, dim);
+      for (int64_t val : {static_cast<int64_t>(-1), test_shape.dim_size(dim)}) {
+        indices_mat(row, dim) = val;
+        Status indices_valid = ValidateSparseTensor<int64_t>(
+            indices, values, shape, index_validation);
+        if (index_validation == IndexValidation::kNone) {
+          TF_EXPECT_OK(indices_valid);
+        } else {
+          EXPECT_THAT(
+              indices_valid,
+              StatusIs(error::INVALID_ARGUMENT,
+                       MatchesRegex("Sparse index tuple .* is out of bounds")))
+              << indices_mat;
+        }
+      }
+
+      // Restore index for next test.
+      indices_mat(row, dim) = old_val;
+    }
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IndexOutOfOrderFailsForOrderedValidation) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumTests = 100;
+  const TensorShape kTensorShapes[] = {{3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    // Access tensor values.
+    auto indices_mat = indices.matrix<int64_t>();
+    const int64_t nnz = indices.dim_size(0);
+    const int64_t ndims = indices.dim_size(1);
+    for (int test = 0; test < kNumTests; ++test) {
+      // Pick two random index entries to swap.
+      int64_t row1 = RandomPhilox().Uniform64(nnz);
+      int64_t row2;
+      do {
+        row2 = RandomPhilox().Uniform64(nnz);
+      } while (row1 == row2);
+      for (int dim = 0; dim < ndims; ++dim) {
+        std::swap(indices_mat(row1, dim), indices_mat(row2, dim));
+      }
+
+      Status indices_valid = ValidateSparseTensor<int64_t>(
+          indices, values, shape, index_validation);
+      if (ordered) {
+        EXPECT_THAT(
+            indices_valid,
+            StatusIs(error::INVALID_ARGUMENT,
+                     MatchesRegex("Sparse index tuple .* is out of order")));
+      } else {
+        TF_EXPECT_OK(indices_valid);
+      }
+
+      // Restore index for next test.
+      for (int dim = 0; dim < ndims; ++dim) {
+        std::swap(indices_mat(row1, dim), indices_mat(row2, dim));
+      }
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    ValidateSparseTensorTestSuite, ValidateSparseTensorTest,
+    ::testing::Values(IndexValidation::kNone, IndexValidation::kOrdered,
+                      IndexValidation::kUnordered),
+    [](const ::testing::TestParamInfo<ValidateSparseTensorTest::ParamType>&
+           info) {
+      switch (info.param) {
+        case IndexValidation::kNone:
+          return "None";
+        case IndexValidation::kUnordered:
+          return "Unordered";
+        case IndexValidation::kOrdered:
+          return "Ordered";
+      }
+    });
+
 }  // namespace
+}  // namespace sparse_utils
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/stage_op.cc b/tensorflow/core/kernels/stage_op.cc
index 55c9db22ddf527..f7bb42f9c52b7d 100644
--- a/tensorflow/core/kernels/stage_op.cc
+++ b/tensorflow/core/kernels/stage_op.cc
@@ -258,6 +258,8 @@ class StagePeekOp : public OpKernel {
     core::ScopedUnref scope(buf);
     Buffer::Tuple tuple;
 
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(ctx->input(0).shape()),
+                errors::InvalidArgument("index must be scalar"));
     std::size_t index = ctx->input(0).scalar<int>()();
 
     OP_REQUIRES_OK(ctx, buf->Peek(index, &tuple));
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 30f083dae1a41d..3ec2d6bbcc3c17 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -431,6 +431,7 @@ class StridedSliceAssignOp : public OpKernel {
                           StridedSliceAssignOp<CPUDevice, type, true>)
 
 TF_CALL_ALL_TYPES(REGISTER_STRIDED_SLICE);
+TF_CALL_QUANTIZED_TYPES(REGISTER_STRIDED_SLICE);
 
 #undef REGISTER_STRIDED_SLICE
 
diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h
index d4dd85eebdeb24..7ec7d868f2263e 100644
--- a/tensorflow/core/kernels/strided_slice_op_impl.h
+++ b/tensorflow/core/kernels/strided_slice_op_impl.h
@@ -292,7 +292,7 @@ TF_CALL_GPU_ALL_TYPES(DECLARE_FOR_N_GPU);
 #endif  // END GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 TF_CALL_ALL_TYPES(DECLARE_FOR_N_CPU);
-
+TF_CALL_QUANTIZED_TYPES(DECLARE_FOR_N_CPU);
 
 #undef INSTANTIATE
 #undef DECLARE_FOR_N_CPU
diff --git a/tensorflow/core/kernels/summary_audio_op.cc b/tensorflow/core/kernels/summary_audio_op.cc
index 09aab681f560aa..6015fe2ab5ebe4 100644
--- a/tensorflow/core/kernels/summary_audio_op.cc
+++ b/tensorflow/core/kernels/summary_audio_op.cc
@@ -49,6 +49,11 @@ class SummaryAudioOp : public OpKernel {
     float sample_rate = sample_rate_attr_;
     if (!has_sample_rate_attr_) {
       const Tensor& sample_rate_tensor = c->input(2);
+      OP_REQUIRES(c,
+                  sample_rate_tensor.IsAligned() &&
+                      sample_rate_tensor.NumElements() == 1,
+                  errors::InvalidArgument(
+                      "sample_rate must be rank-0 or contain a single value"));
       sample_rate = sample_rate_tensor.scalar<float>()();
     }
     OP_REQUIRES(c, sample_rate > 0.0f,
diff --git a/tensorflow/core/kernels/summary_tensor_op.cc b/tensorflow/core/kernels/summary_tensor_op.cc
index e367045b02ab48..730ef6f38e5d62 100644
--- a/tensorflow/core/kernels/summary_tensor_op.cc
+++ b/tensorflow/core/kernels/summary_tensor_op.cc
@@ -36,6 +36,10 @@ class SummaryTensorOpV2 : public OpKernel {
                 errors::InvalidArgument("tag must be scalar"));
     const Tensor& tensor = c->input(1);
     const Tensor& serialized_summary_metadata_tensor = c->input(2);
+    OP_REQUIRES(
+        c,
+        TensorShapeUtils::IsScalar(serialized_summary_metadata_tensor.shape()),
+        errors::InvalidArgument("serialized_summary_metadata must be scalar"));
 
     Summary s;
     Summary::Value* v = s.add_value();
diff --git a/tensorflow/core/kernels/unsorted_segment_join_op.cc b/tensorflow/core/kernels/unsorted_segment_join_op.cc
index 2406f9b0d7c5bd..1d13bd95697e89 100644
--- a/tensorflow/core/kernels/unsorted_segment_join_op.cc
+++ b/tensorflow/core/kernels/unsorted_segment_join_op.cc
@@ -92,7 +92,15 @@ class UnsortedSegmentJoinOp : public OpKernel {
     const Tensor& num_segments_tensor = context->input(2);
     OP_REQUIRES(context, num_segments_tensor.NumElements() != 0,
                 errors::InvalidArgument("Number of segments cannot be empty."));
+    OP_REQUIRES(context,
+                TensorShapeUtils::IsScalar(num_segments_tensor.shape()),
+                errors::InvalidArgument("Number of segments must be a scalar"));
+
     auto num_segments = num_segments_tensor.scalar<NUM_SEGMENTS_TYPE>()();
+    OP_REQUIRES(
+        context, num_segments >= 0,
+        errors::InvalidArgument(
+            "Number of segments must be non-negative but got ", num_segments));
 
     OP_REQUIRES(context, segment_dims != 0,
                 errors::InvalidArgument("Segment_id cannot have rank 0"));
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 8c76caf4f0fed4..0ede70599d05c2 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2879,6 +2879,10 @@ REGISTER_OP("QuantizeAndDequantizeV2")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2914,6 +2918,10 @@ REGISTER_OP("QuantizeAndDequantizeV4")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2945,6 +2953,10 @@ REGISTER_OP("QuantizeAndDequantizeV4Grad")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2981,6 +2993,10 @@ REGISTER_OP("QuantizeAndDequantizeV3")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 7bc9f8c9ffd294..b5a04f7575f4ea 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1483,17 +1483,18 @@ Status RangeSize(const Tensor* start_t, const Tensor* limit_t,
     return errors::InvalidArgument("Requires delta != 0");
   }
 
-  auto size = (std::is_integral<T>::value
-                   ? ((Eigen::numext::abs(limit - start) +
-                       Eigen::numext::abs(delta) - T(1)) /
-                      Eigen::numext::abs(delta))
-                   : (Eigen::numext::ceil(
-                         Eigen::numext::abs((limit - start) / delta))));
-
-  // Undefined behaviour if size will not fit into int64_t
-  if (size > std::numeric_limits<int64_t>::max()) {
-    return errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
-                                   std::numeric_limits<int64_t>::max());
+  int64_t size;
+  if (std::is_integral<T>::value) {
+    size = Eigen::divup(static_cast<int64_t>(Eigen::numext::abs(limit - start)),
+                        static_cast<int64_t>(Eigen::numext::abs(delta)));
+  } else {
+    auto size_auto =
+        Eigen::numext::ceil(Eigen::numext::abs((limit - start) / delta));
+    if (size_auto > std::numeric_limits<int64_t>::max()) {
+      return errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
+                                     std::numeric_limits<int64_t>::max());
+    }
+    size = static_cast<int64_t>(size_auto);
   }
 
   c->set_output(0, c->Vector(static_cast<int64_t>(size)));
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index 5041fd80750ff8..36da19949d9741 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -60,6 +60,13 @@ Status FractionalPoolShapeFn(InferenceContext* c) {
     }
   }
 
+  for (std::size_t i = 0; i < pooling_ratio.size(); ++i) {
+    if (pooling_ratio[i] < 1) {
+      return errors::InvalidArgument(
+          "pooling_ratio cannot be smaller than 1, got: ", pooling_ratio[i]);
+    }
+  }
+
   c->set_output(0, c->MakeShape(output_dims));
   c->set_output(1, c->Vector(output_dims[1]));
   c->set_output(2, c->Vector(output_dims[2]));
@@ -574,7 +581,7 @@ REGISTER_OP("FusedResizeAndPadConv2D")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
-      return CommonFusedConvCalculations(c, true /* has_resize */);
+      return CommonFusedConvCalculations(c, /*has_resize=*/true);
     });
 
 REGISTER_OP("FusedPadConv2D")
@@ -587,7 +594,7 @@ REGISTER_OP("FusedPadConv2D")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
-      return CommonFusedConvCalculations(c, false /* has_resize */);
+      return CommonFusedConvCalculations(c, /*has_resize=*/false);
     });
 
 // --------------------------------------------------------------------------
diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc
index 469a9015a17c98..41940da69ef4c7 100644
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@@ -523,7 +523,8 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
                        .Finalize(&op.node_def));
     };
 
-    set_op(std::vector<float>{2.0f, 1, 1 / 1.5f, 1 / 2.0f});
+    // pooling_ratio must >= 1.0
+    set_op(std::vector<float>{2.0f, 1, 1.5f, 4.0f});
 
     // Rank check.
     INFER_ERROR("must be rank 4", op, "[?,?,?]");
@@ -532,11 +533,11 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
     INFER_OK(op, "?", "[?,?,?,?];[?];[?]");
     INFER_OK(op, "[?,?,?,?]", "[?,?,?,?];[?];[?]");
 
-    INFER_OK(op, "[10,20,30,40]", "[5,20,45,80];[20];[45]");
-    INFER_OK(op, "[?,20,30,40]", "[?,20,45,80];[20];[45]");
-    INFER_OK(op, "[10,?,30,40]", "[5,?,45,80];[?];[45]");
-    INFER_OK(op, "[10,20,?,40]", "[5,20,?,80];[20];[?]");
-    INFER_OK(op, "[10,20,30,?]", "[5,20,45,?];[20];[45]");
+    INFER_OK(op, "[10,20,30,40]", "[5,20,20,10];[20];[20]");
+    INFER_OK(op, "[?,20,30,40]", "[?,20,20,10];[20];[20]");
+    INFER_OK(op, "[10,?,30,40]", "[5,?,20,10];[?];[20]");
+    INFER_OK(op, "[10,20,?,40]", "[5,20,?,10];[20];[?]");
+    INFER_OK(op, "[10,20,30,?]", "[5,20,20,?];[20];[20]");
 
     // Wrong number of values for pooling_ratio.
     set_op(std::vector<float>{.5, 1.0, 1.5});
diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h
index aa7700edbc14b2..621f9ffc9044da 100644
--- a/tensorflow/core/platform/default/logging.h
+++ b/tensorflow/core/platform/default/logging.h
@@ -85,7 +85,7 @@ class LogMessage : public std::basic_ostringstream<char> {
 // that the ternary VLOG() implementation is balanced, type wise.
 struct Voidifier {
   template <typename T>
-  void operator&(const T&)const {}
+  void operator&(const T&) const {}
 };
 
 // LogMessageFatal ensures the process will exit in failure after
@@ -348,11 +348,13 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
 }
 
 // Helper functions for CHECK_OP macro.
-// The (int, int) specialization works around the issue that the compiler
+// We use the full name Check_EQ, Check_NE, etc. in case the file including
+// base/logging.h provides its own #defines for the simpler names EQ, NE, etc.
+// This happens if, for example, those are used as token names in a
+// yacc grammar.
+// The (int, int) overload works around the issue that the compiler
 // will not instantiate the template version of the function on values of
 // unnamed enum type - see comment below.
-// The (size_t, int) and (int, size_t) specialization are to handle unsigned
-// comparison errors while still being thorough with the comparison.
 #define TF_DEFINE_CHECK_OP_IMPL(name, op)                                 \
   template <typename T1, typename T2>                                     \
   inline string* name##Impl(const T1& v1, const T2& v2,                   \
@@ -364,34 +366,77 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) {
   }                                                                       \
   inline string* name##Impl(int v1, int v2, const char* exprtext) {       \
     return name##Impl<int, int>(v1, v2, exprtext);                        \
-  }                                                                       \
-  inline string* name##Impl(const size_t v1, const int v2,                \
-                            const char* exprtext) {                       \
-    if (TF_PREDICT_FALSE(v2 < 0)) {                                       \
-      return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \
-    }                                                                     \
-    return name##Impl<size_t, size_t>(v1, v2, exprtext);                  \
-  }                                                                       \
-  inline string* name##Impl(const int v1, const size_t v2,                \
-                            const char* exprtext) {                       \
-    if (TF_PREDICT_FALSE(v2 >= std::numeric_limits<int>::max())) {        \
-      return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext); \
-    }                                                                     \
-    const size_t uval = (size_t)((unsigned)v2);                           \
-    return name##Impl<size_t, size_t>(v1, uval, exprtext);                \
   }
 
-// We use the full name Check_EQ, Check_NE, etc. in case the file including
-// base/logging.h provides its own #defines for the simpler names EQ, NE, etc.
-// This happens if, for example, those are used as token names in a
-// yacc grammar.
-TF_DEFINE_CHECK_OP_IMPL(Check_EQ,
-                        ==)  // Compilation error with CHECK_EQ(NULL, x)?
-TF_DEFINE_CHECK_OP_IMPL(Check_NE, !=)  // Use CHECK(x == NULL) instead.
+// The (size_t, int) and (int, size_t) specialization are to handle unsigned
+// comparison errors while still being thorough with the comparison.
+
+TF_DEFINE_CHECK_OP_IMPL(Check_EQ, ==)
+// Compilation error with CHECK_EQ(NULL, x)?
+// Use CHECK(x == NULL) instead.
+
+inline string* Check_EQImpl(int v1, size_t v2, const char* exprtext) {
+  if (TF_PREDICT_FALSE(v1 < 0))
+    ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext);
+
+  return Check_EQImpl(size_t(v1), v2, exprtext);
+}
+
+inline string* Check_EQImpl(size_t v1, int v2, const char* exprtext) {
+  return Check_EQImpl(v2, v1, exprtext);
+}
+
+TF_DEFINE_CHECK_OP_IMPL(Check_NE, !=)
+
+inline string* Check_NEImpl(int v1, size_t v2, const char* exprtext) {
+  if (v1 < 0) return NULL;
+
+  return Check_NEImpl(size_t(v1), v2, exprtext);
+}
+
+inline string* Check_NEImpl(size_t v1, int v2, const char* exprtext) {
+  return Check_NEImpl(v2, v1, exprtext);
+}
+
 TF_DEFINE_CHECK_OP_IMPL(Check_LE, <=)
+
+inline string* Check_LEImpl(int v1, size_t v2, const char* exprtext) {
+  if (v1 <= 0) return NULL;
+
+  return Check_LEImpl(size_t(v1), v2, exprtext);
+}
+
+inline string* Check_LEImpl(size_t v1, int v2, const char* exprtext) {
+  if (TF_PREDICT_FALSE(v2 < 0))
+    return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext);
+  return Check_LEImpl(v1, size_t(v2), exprtext);
+}
+
 TF_DEFINE_CHECK_OP_IMPL(Check_LT, <)
-TF_DEFINE_CHECK_OP_IMPL(Check_GE, >=)
-TF_DEFINE_CHECK_OP_IMPL(Check_GT, >)
+
+inline string* Check_LTImpl(int v1, size_t v2, const char* exprtext) {
+  if (v1 < 0) return NULL;
+
+  return Check_LTImpl(size_t(v1), v2, exprtext);
+}
+
+inline string* Check_LTImpl(size_t v1, int v2, const char* exprtext) {
+  if (v2 < 0)
+    return ::tensorflow::internal::MakeCheckOpString(v1, v2, exprtext);
+  return Check_LTImpl(v1, size_t(v2), exprtext);
+}
+
+// Implement GE,GT in terms of LE,LT
+template <typename T1, typename T2>
+inline string* Check_GEImpl(const T1& v1, const T2& v2, const char* exprtext) {
+  return Check_LEImpl(v2, v1, exprtext);
+}
+
+template <typename T1, typename T2>
+inline string* Check_GTImpl(const T1& v1, const T2& v2, const char* exprtext) {
+  return Check_LTImpl(v2, v1, exprtext);
+}
+
 #undef TF_DEFINE_CHECK_OP_IMPL
 
 // In optimized mode, use CheckOpString to hint to compiler that
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 157ce4e661b8d0..193a3c0d31da27 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -22,7 +22,7 @@ limitations under the License.
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
 #define TF_MINOR_VERSION 8
-#define TF_PATCH_VERSION 0
+#define TF_PATCH_VERSION 4
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/BUILD b/tensorflow/core/util/BUILD
index c9aa828e38440c..e05291f14a9829 100644
--- a/tensorflow/core/util/BUILD
+++ b/tensorflow/core/util/BUILD
@@ -530,6 +530,9 @@ tf_cuda_library(
 cc_library(
     name = "overflow",
     hdrs = ["overflow.h"],
+    visibility = [
+        "//tensorflow:internal",
+    ],
     deps = [
         "//tensorflow/core/platform:logging",
         "//tensorflow/core/platform:macros",
diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h
index cbd7a6f8693e35..531c47aec3dab8 100644
--- a/tensorflow/core/util/bcast.h
+++ b/tensorflow/core/util/bcast.h
@@ -134,7 +134,7 @@ BCastList<N>::BCastList(const BCastList::Vec (&x)[N],
   typedef BCastList::Vec Vec;
 
   // Safely multiplies dimensions taking into account symbolic shapes.
-  auto mul_dims = [](int64_t dim1, int64_t dim2) -> int64 {
+  auto mul_dims = [](int64_t dim1, int64_t dim2) -> int64_t {
     return dim1 != 0 && dim2 != 0 && (dim1 < 0 || dim2 < 0) ? -1 : dim1 * dim2;
   };
 
@@ -199,7 +199,7 @@ BCastList<N>::BCastList(const BCastList::Vec (&x)[N],
   }
   Vec output;
   bool output_dim_set = false;
-  int output_dim = -1;
+  int64_t output_dim = -1;
   bool none_is_one = true;
   bool set_one = false;
   for (int j = 0; j < largest_rank; ++j) {
diff --git a/tensorflow/core/util/bcast_test.cc b/tensorflow/core/util/bcast_test.cc
index 60eb7859ec3f15..f33483a5846d67 100644
--- a/tensorflow/core/util/bcast_test.cc
+++ b/tensorflow/core/util/bcast_test.cc
@@ -375,6 +375,13 @@ TEST(BCastTest, Basic_Tensor_Scalar) {
             "[11,7,5,3,2]"
             "[11,7,5,3,2]"
             "[0,1,2,3,4][]");
+
+  // int32 edge-case:
+  EXPECT_EQ(BCast({1, 2147483648}, {1}),
+            "[2147483648][1][1][2147483648]"
+            "[2147483648]"
+            "[1,2147483648]"
+            "[0][0,1]");
 }
 
 TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index ca1b90abbb09c9..4ce2aefddc218a 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -79,6 +79,18 @@ struct StridedSliceDenseSpec {
 template <class T>
 static Status TF_MUST_USE_RESULT BuildDenseSpec(
     const StridedSliceSparseSpec& sparse, StridedSliceDenseSpec* dense) {
+  if (dense->dims < 0) {
+    return errors::InvalidArgument("Unexpected negative dense.dims: %d",
+                                   dense->dims);
+  }
+
+  if (dense->dims >= 1024) {
+    // We do not expect to see tensors with rank >= 1024, it must mean that
+    // there is a bug somewhere.
+    return errors::InvalidArgument("Unexpected large dense.dims: %d",
+                                   dense->dims);
+  }
+
   // Build expanded begin, end, strides, begin_mask, end_mask
   // to remove any ellipsis
   dense->begin.resize(dense->dims);
diff --git a/tensorflow/core/util/tensor_slice_writer.cc b/tensorflow/core/util/tensor_slice_writer.cc
index a74e2a04cedbe5..731bce308fc9fb 100644
--- a/tensorflow/core/util/tensor_slice_writer.cc
+++ b/tensorflow/core/util/tensor_slice_writer.cc
@@ -131,6 +131,16 @@ Status TensorSliceWriter::Finish() {
 
 /* static */
 size_t TensorSliceWriter::MaxBytesPerElement(DataType dt) {
+  size_t max_bytes_per_element =
+      TensorSliceWriter::MaxBytesPerElementOrZero(dt);
+  if (max_bytes_per_element == 0) {
+    LOG(FATAL) << "MaxBytesPerElement not implemented for dtype: " << dt;
+  }
+  return max_bytes_per_element;
+}
+
+/* static */
+size_t TensorSliceWriter::MaxBytesPerElementOrZero(DataType dt) {
   switch (dt) {
     case DT_FLOAT:
       return 4;
@@ -170,9 +180,8 @@ size_t TensorSliceWriter::MaxBytesPerElement(DataType dt) {
     case DT_STRING:
     case DT_BFLOAT16:
     default:
-      LOG(FATAL) << "MaxBytesPerElement not implemented for dtype: " << dt;
+      return 0;
   }
-  return 0;
 }
 
 template <>
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index 01f2e62dfbd2bc..9aa51c29cb323d 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -68,6 +68,8 @@ class TensorSliceWriter {
   static size_t MaxBytesPerElement(DataType dt);
 
  private:
+  static size_t MaxBytesPerElementOrZero(DataType dt);
+
   static constexpr size_t kMaxMessageBytes = 1LL << 31;
   // Filling in the TensorProto in a SavedSlice will add the following
   // header bytes, in addition to the data:
@@ -162,9 +164,15 @@ Status TensorSliceWriter::Add(const string& name, const TensorShape& shape,
 template <typename T>
 Status TensorSliceWriter::SaveData(const T* data, int64_t num_elements,
                                    SavedSlice* ss) {
-  size_t size_bound =
-      ss->ByteSize() + kTensorProtoHeaderBytes +
-      (MaxBytesPerElement(DataTypeToEnum<T>::value) * num_elements);
+  size_t max_bytes_per_element =
+      MaxBytesPerElementOrZero(DataTypeToEnum<T>::value);
+  if (max_bytes_per_element == 0) {
+    return errors::InvalidArgument(
+        "Tensor slice serialization not implemented for dtype ",
+        DataTypeToEnum<T>::value);
+  }
+  size_t size_bound = ss->ByteSize() + kTensorProtoHeaderBytes +
+                      (max_bytes_per_element * num_elements);
   if (size_bound > kMaxMessageBytes) {
     return errors::InvalidArgument(
         "Tensor slice is too large to serialize (conservative estimate: ",
diff --git a/tensorflow/core/util/tensor_slice_writer_test.cc b/tensorflow/core/util/tensor_slice_writer_test.cc
index 51aa781af11137..d09f92eed97553 100644
--- a/tensorflow/core/util/tensor_slice_writer_test.cc
+++ b/tensorflow/core/util/tensor_slice_writer_test.cc
@@ -15,17 +15,19 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_slice_writer.h"
 
+#include <algorithm>
 #include <array>
+#include <memory>
+#include <vector>
 
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 #include "tensorflow/core/public/version.h"
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 #include "tensorflow/core/util/tensor_slice_reader.h"
@@ -350,6 +352,17 @@ TEST(TensorSliceWriteTest, SizeErrors) {
   }
 }
 
+TEST(TensorSliceWriterTest, InvalidInput) {
+  SavedSlice ss;
+  std::array<uint32_t, 1> data;
+  std::fill(data.begin(), data.end(), 1234);
+  Status s = TensorSliceWriter::SaveData(data.data(), data.size(), &ss);
+  EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(absl::StrContains(
+      s.error_message(),
+      "Tensor slice serialization not implemented for dtype"));
+}
+
 }  // namespace checkpoint
 
 }  // namespace tensorflow
diff --git a/tensorflow/lite/g3doc/guide/build_cmake_pip.md b/tensorflow/lite/g3doc/guide/build_cmake_pip.md
index f6d000109fb079..76e710c5b4abfa 100644
--- a/tensorflow/lite/g3doc/guide/build_cmake_pip.md
+++ b/tensorflow/lite/g3doc/guide/build_cmake_pip.md
@@ -23,7 +23,7 @@ PYTHON=python3 tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh
 ```
 
 **Note:** If you have multiple Python interpreters available, specify the exact
-Python version with `PYTHON` variable. (Currently, it supports Python 3.5 or
+Python version with `PYTHON` variable. (Currently, it supports Python 3.7 or
 higher)
 
 ## ARM cross compilation
@@ -32,26 +32,16 @@ For ARM cross compilation, it's recommended to use Docker since it makes easier
 to setup cross build environment. Also you needs a `target` option to figure out
 the target architecture.
 
-There is a helper script `tensorflow/tools/ci_build/ci_build.sh` available to
-invoke a build command using a pre-defined Docker container. On a Docker host
-machine, you can run a build command with the `container` name and the `target`
-name as followings.
+There is a helper tool in Makefile `tensorflow/lite/tools/pip_package/Makefile`
+available to invoke a build command using a pre-defined Docker container. On a
+Docker host machine, you can run a build command as followings.
 
 ```sh
-tensorflow/tools/ci_build/ci_build.sh <container> \
-  tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh <target>
+make -C tensorflow/lite/tools/pip_package docker-build \
+  TENSORFLOW_TARGET=<target> PYTHON_VERSION=<python3 version>
 ```
 
-### Available Docker containers
-
-You need to select ARM cross build container for your target Python interpreter
-version. Here is the list of supported containers.
-
-Container   | Supported Python version
------------ | ------------------------
-PI-PYTHON37 | Python 3.7
-PI-PYTHON38 | Python 3.8
-PI-PYTHON39 | Python 3.9
+**Note:** Python version 3.7 or higher is supported.
 
 ### Available target names
 
@@ -74,15 +64,15 @@ Here are some example commands you can use.
 #### armhf target for Python 3.7
 
 ```sh
-tensorflow/tools/ci_build/ci_build.sh PI-PYTHON37 \
-  tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh armhf
+make -C tensorflow/lite/tools/pip_package docker-build \
+  TENSORFLOW_TARGET=armhf PYTHON_VERSION=3.7
 ```
 
 #### aarch64 target for Python 3.8
 
 ```sh
-tensorflow/tools/ci_build/ci_build.sh PI-PYTHON38 \
-  tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh aarch64
+make -C tensorflow/lite/tools/pip_package docker-build \
+  TENSORFLOW_TARGET=aarch64 PYTHON_VERSION=3.8
 ```
 
 #### How to use a custom toolchain?
diff --git a/tensorflow/lite/kernels/comparisons.cc b/tensorflow/lite/kernels/comparisons.cc
index d0a1876c5c654f..c3824c1db01706 100644
--- a/tensorflow/lite/kernels/comparisons.cc
+++ b/tensorflow/lite/kernels/comparisons.cc
@@ -81,6 +81,17 @@ TfLiteStatus ComparisonPrepareStringAllowed(TfLiteContext* context,
   return ComparisonPrepareCommon(context, node, true);
 }
 
+void QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier,
+                        int* left_shift) {
+  if (double_multiplier < 1.0) {
+    QuantizeMultiplierSmallerThanOneExp(double_multiplier, quantized_multiplier,
+                                        left_shift);
+  } else {
+    QuantizeMultiplierGreaterThanOne(double_multiplier, quantized_multiplier,
+                                     left_shift);
+  }
+}
+
 template <typename input_dtype, reference_ops::ComparisonFn<int32> opname>
 void ComparisonQuantized(const TfLiteTensor* input1, const TfLiteTensor* input2,
                          TfLiteTensor* output, bool requires_broadcast) {
@@ -90,13 +101,11 @@ void ComparisonQuantized(const TfLiteTensor* input1, const TfLiteTensor* input2,
     const int left_shift = 8;
 
     int32 input1_multiplier;
-    int input1_shift;
-    QuantizeMultiplierSmallerThanOneExp(input1->params.scale,
-                                        &input1_multiplier, &input1_shift);
     int32 input2_multiplier;
+    int input1_shift;
     int input2_shift;
-    QuantizeMultiplierSmallerThanOneExp(input2->params.scale,
-                                        &input2_multiplier, &input2_shift);
+    QuantizeMultiplier(input1->params.scale, &input1_multiplier, &input1_shift);
+    QuantizeMultiplier(input2->params.scale, &input2_multiplier, &input2_shift);
 
     ComparisonParams op_params;
     op_params.left_shift = left_shift;
diff --git a/tensorflow/lite/kernels/comparisons_test.cc b/tensorflow/lite/kernels/comparisons_test.cc
index f8cf6dee74c4bf..074d0f1f61513a 100644
--- a/tensorflow/lite/kernels/comparisons_test.cc
+++ b/tensorflow/lite/kernels/comparisons_test.cc
@@ -653,6 +653,26 @@ TEST(ComparisonsTest, QuantizedInt8GreaterWithBroadcast) {
   }
 }
 
+TEST(ComparisonsTest,
+     QuantizedInt8GreaterWithBroadcastMultiplierGreaterThanOne) {
+  const float kMin = -127.f;
+  const float kMax = 127.f;
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    ComparisonOpModel model({TensorType_INT8, test_shapes[i], kMin, kMax},
+                            {TensorType_INT8, {}, kMin, kMax}, TensorType_INT8,
+                            BuiltinOperator_GREATER);
+    model.QuantizeAndPopulate<int8_t>(model.input1(),
+                                      {572, -2, -71, 8, 11, 20});
+    model.QuantizeAndPopulate<int8_t>(model.input2(), {8});
+    model.Invoke();
+    EXPECT_THAT(model.GetOutput(),
+                ElementsAre(true, false, false, false, true, true))
+        << "With shape number " << i;
+  }
+}
+
 TEST(ComparisonsTest, QuantizedUInt8GreaterEqualWithBroadcast) {
   const float kMin = -1.f;
   const float kMax = 128.f;
diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
index c39917b478505f..47e4119b5e57ac 100644
--- a/tensorflow/lite/kernels/gather_nd.cc
+++ b/tensorflow/lite/kernels/gather_nd.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include <stdint.h>
 
+#include "tensorflow/lite/c/c_api_types.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
@@ -102,13 +103,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <typename ParamsT, typename IndicesT>
-TfLiteStatus GatherNd(const TfLiteTensor* params, const TfLiteTensor* indices,
-                      TfLiteTensor* output) {
-  reference_ops::GatherNd(
+TfLiteStatus GatherNd(TfLiteContext* context, const TfLiteTensor* params,
+                      const TfLiteTensor* indices, TfLiteTensor* output) {
+  const TfLiteStatus status = reference_ops::GatherNd(
       GetTensorShape(params), GetTensorData<ParamsT>(params),
       GetTensorShape(indices), GetTensorData<IndicesT>(indices),
       GetTensorShape(output), GetTensorData<ParamsT>(output));
-  return kTfLiteOk;
+  if (status != kTfLiteOk) {
+    TF_LITE_KERNEL_LOG(context, "gather_nd index out of bounds");
+  }
+  return status;
 }
 
 template <typename IndicesT>
@@ -136,17 +140,17 @@ TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params,
 
   switch (params->type) {
     case kTfLiteFloat32:
-      return GatherNd<float, IndicesT>(params, indices, output);
+      return GatherNd<float, IndicesT>(context, params, indices, output);
     case kTfLiteUInt8:
-      return GatherNd<uint8_t, IndicesT>(params, indices, output);
+      return GatherNd<uint8_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt8:
-      return GatherNd<int8_t, IndicesT>(params, indices, output);
+      return GatherNd<int8_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt16:
-      return GatherNd<int16_t, IndicesT>(params, indices, output);
+      return GatherNd<int16_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt32:
-      return GatherNd<int32_t, IndicesT>(params, indices, output);
+      return GatherNd<int32_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt64:
-      return GatherNd<int64_t, IndicesT>(params, indices, output);
+      return GatherNd<int64_t, IndicesT>(context, params, indices, output);
     case kTfLiteString:
       return GatherNdString<IndicesT>(params, indices, output);
     default:
diff --git a/tensorflow/lite/kernels/gather_nd_test.cc b/tensorflow/lite/kernels/gather_nd_test.cc
index 1e9a6fce252255..99768de0ba976f 100644
--- a/tensorflow/lite/kernels/gather_nd_test.cc
+++ b/tensorflow/lite/kernels/gather_nd_test.cc
@@ -73,6 +73,22 @@ TEST(GatherNdOpTest, ElementIndexingIntoMatrix) {
   EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({1.1, 2.2}));
 }
 
+TEST(GatherNdOpTest, ErrorOnOutOfBoundsTooLarge) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({0, 0, 2, 0});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+  m.SetPositions<int32_t>({0, 0, 1, 2});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+}
+
+TEST(GatherNdOpTest, ErrorOnOutOfBoundsNegative) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({1, -1, 1, 1});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+}
+
 TEST(GatherNdOpTest, SliceIndexingIntoMatrix) {
   GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 1}});
   m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 3ee17f59d2626b..04576843a1efad 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -1043,7 +1043,6 @@ cc_test(
     srcs = [
         "strided_slice_logic_test.cc",
     ],
-    shard_count = 4,
     deps = [
         ":strided_slice_logic",
         "@com_google_googletest//:gtest_main",
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index a5159976283cc0..b95466fa73f8e9 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -5000,108 +5000,6 @@ inline void Slice(const tflite::SliceParams& op_params,
   return Slice(op_params, input_shape, output_shape, &writer);
 }
 
-// Note: This implementation is only optimized for the case where the inner
-// stride == 1.
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const RuntimeShape& unextended_output_shape,
-                         SequentialTensorWriter<T>* writer) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-
-  ruy::profiler::ScopeLabel label("StridedSlice");
-
-  // Note that the output_shape is not used herein.
-  tflite::StridedSliceParams params_copy = op_params;
-
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(5, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(5, unextended_output_shape);
-
-  // Reverse and pad to 5 dimensions because that is what the runtime code
-  // requires (ie. all shapes must be 5D and are given backwards).
-  strided_slice::StridedSlicePadIndices(&params_copy, 5);
-
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-  const bool inner_stride_is_1 = params_copy.strides[4] == 1;
-
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          // When the stride is 1, the inner loop is equivalent to the
-          // optimized slice inner loop. Otherwise, it is identical to the
-          // strided_slice reference implementation inner loop.
-          if (inner_stride_is_1) {
-            const int len = stop_4 - start_4;
-            if (len > 0) {
-              writer->WriteN(offset_3 + start_4, len);
-            }
-          } else {
-            for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-                 !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-                 offset_4 += params_copy.strides[4]) {
-              writer->Write(offset_4);
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const T* input_data,
-                         const RuntimeShape& unextended_output_shape,
-                         T* output_data) {
-  SequentialTensorWriter<T> writer(input_data, output_data);
-  StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
-                  &writer);
-}
-
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const TfLiteTensor* input,
-                         const RuntimeShape& unextended_output_shape,
-                         TfLiteTensor* output) {
-  SequentialTensorWriter<T> writer(input, output);
-  StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
-                  &writer);
-}
-
 template <typename T>
 void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
              const T* input2_data, const RuntimeShape& output_shape,
diff --git a/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h b/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
index d0e2ef3026e4a2..322b3c594555e3 100644
--- a/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
+++ b/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
@@ -111,14 +111,13 @@ inline void Conv3DTranspose(
   if (bias_data) {
     const int outer_size =
         batches * output_depth * output_height * output_width;
-    const int num_channels = input_shape.Dims(4);
     for (int n = 0; n < outer_size; ++n) {
       for (int c = 0; c < output_num_channels; ++c) {
         data_ptr[c] = ActivationFunctionWithMinMax(data_ptr[c] + bias_data[c],
                                                    float_activation_min,
                                                    float_activation_max);
       }
-      data_ptr += num_channels;
+      data_ptr += output_num_channels;
     }
   } else {
     const int flat_size = output_shape.FlatSize();
diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index 4c6b867a59ddde..f96c9a00683dd3 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -29,6 +29,7 @@ limitations under the License.
 #include "third_party/eigen3/Eigen/Core"
 #include "fixedpoint/fixedpoint.h"
 #include "ruy/profiler/instrumentation.h"  // from @ruy
+#include "tensorflow/lite/c/c_api_types.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
@@ -971,23 +972,31 @@ inline GatherNdHelperResult GatherNdHelper(const RuntimeShape& params_shape,
   return ret;
 }
 
+// Implements GatherNd.
+// Returns an error if any of the indices_data would cause an out of bounds
+// memory read.
 template <typename ParamsT, typename IndicesT = int32>
-inline void GatherNd(const RuntimeShape& params_shape,
-                     const ParamsT* params_data,
-                     const RuntimeShape& indices_shape,
-                     const IndicesT* indices_data,
-                     const RuntimeShape& output_shape, ParamsT* output_data) {
+inline TfLiteStatus GatherNd(const RuntimeShape& params_shape,
+                             const ParamsT* params_data,
+                             const RuntimeShape& indices_shape,
+                             const IndicesT* indices_data,
+                             const RuntimeShape& output_shape,
+                             ParamsT* output_data) {
   ruy::profiler::ScopeLabel label("GatherNd");
 
   const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
   for (int i = 0; i < res.n_slices; ++i) {
-    int from_pos = 0;
+    int64_t from_pos = 0;
     for (int j = 0; j < res.indices_nd; ++j) {
       from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
     }
+    if (from_pos < 0 || from_pos + res.slice_size > params_shape.FlatSize()) {
+      return kTfLiteError;
+    }
     std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
                 sizeof(ParamsT) * res.slice_size);
   }
+  return kTfLiteOk;
 }
 
 #ifndef TF_LITE_STATIC_MEMORY
@@ -1016,11 +1025,12 @@ inline void GatherNdString(const RuntimeShape& params_shape,
 #endif
 
 template <typename IndicesT, typename UpdatesT>
-inline void ScatterNd(const RuntimeShape& indices_shape,
-                      const IndicesT* indices_data,
-                      const RuntimeShape& updates_shape,
-                      const UpdatesT* updates_data,
-                      const RuntimeShape& output_shape, UpdatesT* output_data) {
+inline TfLiteStatus ScatterNd(const RuntimeShape& indices_shape,
+                              const IndicesT* indices_data,
+                              const RuntimeShape& updates_shape,
+                              const UpdatesT* updates_data,
+                              const RuntimeShape& output_shape,
+                              UpdatesT* output_data) {
   ruy::profiler::ScopeLabel label("ScatterNd");
 
   int n_slices = 1;
@@ -1043,18 +1053,24 @@ inline void ScatterNd(const RuntimeShape& indices_shape,
     remain_flat_size = dims_to_count[i];
   }
 
+  if (n_slices * slice_size > updates_shape.FlatSize()) {
+    return kTfLiteError;
+  }
   memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
   for (int i = 0; i < n_slices; ++i) {
     int to_pos = 0;
     for (int j = 0; j < indices_nd; ++j) {
       IndicesT idx = indices_data[i * indices_nd + j];
-      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
       to_pos += idx * dims_to_count[j];
     }
+    if (to_pos < 0 || to_pos + slice_size > output_flat_size) {
+      return kTfLiteError;
+    }
     for (int j = 0; j < slice_size; j++) {
       output_data[to_pos + j] += updates_data[i * slice_size + j];
     }
   }
+  return kTfLiteOk;
 }
 
 template <typename T>
diff --git a/tensorflow/lite/kernels/internal/reference/strided_slice.h b/tensorflow/lite/kernels/internal/reference/strided_slice.h
index 40dc2e91022015..ff367cf95f19b6 100644
--- a/tensorflow/lite/kernels/internal/reference/strided_slice.h
+++ b/tensorflow/lite/kernels/internal/reference/strided_slice.h
@@ -31,10 +31,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
                          const RuntimeShape& unextended_input_shape,
                          const RuntimeShape& unextended_output_shape,
                          SequentialTensorWriter<T>* writer) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-
   ruy::profiler::ScopeLabel label("StridedSlice");
 
   // Note that the output_shape is not used herein.
@@ -51,41 +47,67 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
   // requires (ie. all shapes must be 5D and are given backwards).
   strided_slice::StridedSlicePadIndices(&params_copy, 5);
 
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-               !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-               offset_4 += params_copy.strides[4]) {
-            writer->Write(offset_4);
+  const int start_0 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 0);
+  const int stop_0 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 0, start_0);
+  const int start_1 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 1);
+  const int stop_1 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 1, start_1);
+  const int start_2 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 2);
+  const int stop_2 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 2, start_2);
+  const int start_3 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 3);
+  const int stop_3 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 3, start_3);
+  const int start_4 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 4);
+  const int stop_4 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 4, start_4);
+
+  auto lc = [&](int end, int stride, int index) {
+    if (stride < 0) {
+      return index > end;
+    } else {
+      return index < end;
+    }
+  };
+  const int* shape = input_shape.DimsData();
+  const int* stride = params_copy.strides;
+  const bool inner_stride_is_1 = params_copy.strides[4] == 1;
+
+  for (int offset_0 = start_0; lc(stop_0, stride[0], offset_0);
+       offset_0 += stride[0]) {
+    for (int offset_1 = start_1; lc(stop_1, stride[1], offset_1);
+         offset_1 += stride[1]) {
+      for (int offset_2 = start_2; lc(stop_2, stride[2], offset_2);
+           offset_2 += stride[2]) {
+        for (int offset_3 = start_3; lc(stop_3, stride[3], offset_3);
+             offset_3 += stride[3]) {
+          // When the stride is 1, the inner loop is equivalent to the
+          // optimized slice inner loop. Otherwise, it is identical to the
+          // strided_slice reference implementation inner loop.
+          if (inner_stride_is_1) {
+            const int len = stop_4 - start_4;
+            int index = start_4 + offset_3 * shape[4] +
+                        offset_2 * shape[3] * shape[4] +
+                        offset_1 * shape[2] * shape[3] * shape[4] +
+                        offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+            if (len > 0) {
+              writer->WriteN(index, len);
+            }
+          } else {
+            for (int offset_4 = start_4; lc(stop_4, stride[4], offset_4);
+                 offset_4 += stride[4]) {
+              int index = offset_4 + offset_3 * shape[4] +
+                          offset_2 * shape[3] * shape[4] +
+                          offset_1 * shape[2] * shape[3] * shape[4] +
+                          offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+              writer->Write(index);
+            }
           }
         }
       }
diff --git a/tensorflow/lite/kernels/internal/strided_slice_logic.h b/tensorflow/lite/kernels/internal/strided_slice_logic.h
index bfe84050dca156..2efdcf26fe07a4 100644
--- a/tensorflow/lite/kernels/internal/strided_slice_logic.h
+++ b/tensorflow/lite/kernels/internal/strided_slice_logic.h
@@ -69,6 +69,69 @@ inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
   p->strides_count = dim_count;
 }
 
+// Return the index for the first element along that axis. This index will be a
+// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
+// that can be used to index directly into the data.
+inline int StridedSliceStartForAxis(const tflite::StridedSliceParams& params,
+                                    const RuntimeShape& input_shape,
+                                    int32_t axis) {
+  const int32_t axis_size = input_shape.Dims(axis);
+  int32_t start = params.start_indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t begin_mask = (params.begin_mask & 1 << axis);
+  if (start < 0) {
+    start += axis_size;
+  }
+  if (stride > 0) {
+    start = Clamp(start, 0, axis_size);
+  } else {
+    start = Clamp(start, -1, axis_size - 1);
+  }
+  if (begin_mask) {
+    if (stride > 0) {
+      start = 0;
+    } else {
+      start = axis_size - 1;
+    }
+  }
+  return start;
+}
+
+inline int StridedSliceEndForAxis(const tflite::StridedSliceParams& params,
+                                  const RuntimeShape& input_shape, int axis,
+                                  int start) {
+  const auto shrink_axis_mask = params.shrink_axis_mask;
+  const bool shrink_axis = shrink_axis_mask & (1 << axis);
+  const int axis_size = input_shape.Dims(axis);
+  if (shrink_axis) {
+    if (start >= axis_size) {
+      return start;
+    } else {
+      return start + 1;
+    }
+  }
+  const auto* indices = params.stop_indices;
+  int end = indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t end_mask = (params.end_mask & 1 << axis);
+  if (end < 0) {
+    end += axis_size;
+  }
+  if (stride > 0) {
+    end = Clamp(end, 0, axis_size);
+  } else {
+    end = Clamp(end, -1, axis_size - 1);
+  }
+  if (end_mask) {
+    if (stride > 0) {
+      end = axis_size;
+    } else {
+      end = -1;
+    }
+  }
+  return end;
+}
+
 // Return the index for the first element along that axis. This index will be a
 // positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
 // that can be used to index directly into the data.
diff --git a/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc b/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
index 628e72698917c5..494d07690a2106 100644
--- a/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
+++ b/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
@@ -76,5 +76,119 @@ TEST(RunStridedSlicePadIndices, Pad3) {
   );
 }
 
+TEST(StridedSliceStartForAxis, NegativeOOBIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -11;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, NegativeOneTheBoundaryIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -10;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, NegativeWithinBoundsIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -9;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 1);
+}
+
+TEST(StridedSliceStartForAxis, MinusOneIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -1;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 9);
+}
+
+TEST(StridedSliceStartForAxis, ZeroIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 0;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, OneIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 1;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 1);
+}
+
+TEST(StridedSliceStartForAxis, PositiveBoundaryIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 9;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 9);
+}
+
+TEST(StridedSliceStartForAxis, PositiveOOBIndexSizeofArray) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 10;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 10);
+}
+
+TEST(StridedSliceStartForAxis, PositiveOOBIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 11;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 10);
+}
+
+TEST(StridedSliceStartForAxis, TenFourMinus1) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 5;
+  params.stop_indices[0] = 2;
+  params.strides[0] = -1;
+  int start = strided_slice::StridedSliceStartForAxis(params, RuntimeShape({4}),
+                                                      /*axis=*/0);
+  int stop = strided_slice::StridedSliceEndForAxis(params, RuntimeShape({4}),
+                                                   /*axis=*/0, start);
+  EXPECT_EQ(start, 3);
+  EXPECT_EQ(stop, 2);
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/scatter_nd.cc b/tensorflow/lite/kernels/scatter_nd.cc
index 93e2fe36c3fb26..144c07a2da7b4f 100644
--- a/tensorflow/lite/kernels/scatter_nd.cc
+++ b/tensorflow/lite/kernels/scatter_nd.cc
@@ -128,11 +128,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 template <typename IndicesT, typename UpdatesT>
 TfLiteStatus ScatterNd(const TfLiteTensor* indices, const TfLiteTensor* updates,
                        TfLiteTensor* output) {
-  reference_ops::ScatterNd(
+  return reference_ops::ScatterNd(
       GetTensorShape(indices), GetTensorData<IndicesT>(indices),
       GetTensorShape(updates), GetTensorData<UpdatesT>(updates),
       GetTensorShape(output), GetTensorData<UpdatesT>(output));
-  return kTfLiteOk;
 }
 
 template <typename IndicesT>
@@ -148,23 +147,36 @@ TfLiteStatus EvalScatterNd(TfLiteContext* context, const TfLiteTensor* indices,
                       ResizeOutputTensor<IndicesT>(context, shape, output));
   }
 
+  TfLiteStatus status = kTfLiteError;
   switch (updates->type) {
     case kTfLiteFloat32:
-      return ScatterNd<IndicesT, float>(indices, updates, output);
+      status = ScatterNd<IndicesT, float>(indices, updates, output);
+      break;
     case kTfLiteUInt8:
-      return ScatterNd<IndicesT, uint8_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, uint8_t>(indices, updates, output);
+      break;
+    case kTfLiteBool:
+      status = ScatterNd<IndicesT, bool>(indices, updates, output);
+      break;
     case kTfLiteInt8:
-      return ScatterNd<IndicesT, int8_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int8_t>(indices, updates, output);
+      break;
     case kTfLiteInt32:
-      return ScatterNd<IndicesT, int32_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int32_t>(indices, updates, output);
+      break;
     case kTfLiteInt64:
-      return ScatterNd<IndicesT, int64_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int64_t>(indices, updates, output);
+      break;
     default:
       context->ReportError(
           context, "Updates of type '%s' are not supported by scatter_nd.",
           TfLiteTypeGetName(updates->type));
       return kTfLiteError;
   }
+  if (status != kTfLiteOk) {
+    context->ReportError(context, "scatter_nd index out of bounds");
+  }
+  return status;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
diff --git a/tensorflow/lite/kernels/scatter_nd_test.cc b/tensorflow/lite/kernels/scatter_nd_test.cc
index 9fdf176fe1f8cd..154596cd4dd90c 100644
--- a/tensorflow/lite/kernels/scatter_nd_test.cc
+++ b/tensorflow/lite/kernels/scatter_nd_test.cc
@@ -347,5 +347,34 @@ TEST(ScatterNdOpTest, DynamicShape) {
                                 /*2, 3*/ 1,  2,  3,  4,  5}));
 }
 
+TEST(ScatterNdOpTest, ReadAndWriteArrayLimits) {
+  ScatterNdOpModel m({TensorType_INT32, {5, 1}}, {TensorType_INT32, {5}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4, 3, 1, 0, 2});
+  m.SetUpdates<int32_t>({1, 2, 3, 7, 9});
+  m.SetShape<int32_t>({5});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({5}));
+  EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({7, 3, 9, 2, 1}));
+}
+
+TEST(ScatterNdOpTest, OOBRead) {
+  ScatterNdOpModel m({TensorType_INT32, {1, 1}}, {TensorType_INT32, {1}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4});
+  m.SetUpdates<int32_t>({1});
+  m.SetShape<int32_t>({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteError);
+}
+
+TEST(ScatterNdOpTest, OOBWrites) {
+  ScatterNdOpModel m({TensorType_INT32, {5, 1}}, {TensorType_INT32, {5}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4, 3, 1, -0x38, 0x38});
+  m.SetUpdates<int32_t>({1, 2, 3, 0x44444444, 0x55555555});
+  m.SetShape<int32_t>({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteError);
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/strided_slice.cc b/tensorflow/lite/kernels/strided_slice.cc
index 55aecc9276531e..f6f5d584610b27 100644
--- a/tensorflow/lite/kernels/strided_slice.cc
+++ b/tensorflow/lite/kernels/strided_slice.cc
@@ -24,7 +24,6 @@ limitations under the License.
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -70,7 +69,7 @@ struct StridedSliceContext {
 };
 
 StridedSliceParams BuildStridedSliceParams(StridedSliceContext* op_context) {
-  StridedSliceParams op_params;
+  StridedSliceParams op_params{};
 
   // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks
   // are processed here to update begin_mask, end_mask and the index range.
@@ -196,9 +195,9 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
     int32_t stride = op_params.strides[idx];
     TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero");
 
-    int32_t begin = ::tflite::strided_slice::StartForAxis(
+    int32_t begin = ::tflite::strided_slice::StridedSliceStartForAxis(
         op_params, effective_input_shape, idx);
-    int32_t end = ::tflite::strided_slice::StopForAxis(
+    int32_t end = ::tflite::strided_slice::StridedSliceEndForAxis(
         op_params, effective_input_shape, idx, begin);
 
     // When shrinking an axis, the end position does not matter (and can be
@@ -272,43 +271,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   }
   StridedSliceParams op_params = BuildStridedSliceParams(&op_context);
 
-#define TF_LITE_STRIDED_SLICE(data_type)                                 \
-  {                                                                      \
-    if (kernel_type == kGenericOptimized) {                              \
-      optimized_ops::StridedSlice<data_type>(                            \
-          op_params, op_context.effective_input_shape, op_context.input, \
-          GetTensorShape(op_context.output), op_context.output);         \
-    } else {                                                             \
-      reference_ops::StridedSlice<data_type>(                            \
-          op_params, op_context.effective_input_shape, op_context.input, \
-          GetTensorShape(op_context.output), op_context.output);         \
-    }                                                                    \
-  }
-
   switch (op_context.input->type) {
     case kTfLiteFloat32:
-      TF_LITE_STRIDED_SLICE(float);
+      reference_ops::StridedSlice<float>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt32:
-      TF_LITE_STRIDED_SLICE(int32_t);
+      reference_ops::StridedSlice<int32_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt64:
-      TF_LITE_STRIDED_SLICE(int64_t);
+      reference_ops::StridedSlice<int64_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteUInt8:
-      TF_LITE_STRIDED_SLICE(uint8_t);
+      reference_ops::StridedSlice<uint8_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt8:
-      TF_LITE_STRIDED_SLICE(int8_t);
+      reference_ops::StridedSlice<int8_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt16:
-      TF_LITE_STRIDED_SLICE(int16_t);
+      reference_ops::StridedSlice<int16_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteBool:
-      TF_LITE_STRIDED_SLICE(bool);
+      reference_ops::StridedSlice<bool>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteString:
-      TF_LITE_STRIDED_SLICE(string);
+      reference_ops::StridedSlice<string>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     default:
       TF_LITE_KERNEL_LOG(context,
diff --git a/tensorflow/lite/kernels/strided_slice_test.cc b/tensorflow/lite/kernels/strided_slice_test.cc
index df2bf56cb06fff..359da155db0c78 100644
--- a/tensorflow/lite/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/kernels/strided_slice_test.cc
@@ -26,6 +26,7 @@ namespace tflite {
 namespace {
 
 using ::testing::ElementsAreArray;
+using ::testing::IsEmpty;
 
 template <typename input_type>
 class StridedSliceOpModel : public SingleOpModel {
@@ -35,7 +36,7 @@ class StridedSliceOpModel : public SingleOpModel {
                       std::initializer_list<int> end_shape,
                       std::initializer_list<int> strides_shape, int begin_mask,
                       int end_mask, int ellipsis_mask, int new_axis_mask,
-                      int shrink_axis_mask) {
+                      int shrink_axis_mask, bool use_simple_allocator = true) {
     input_ = AddInput(GetTensorType<input_type>());
     begin_ = AddInput(TensorType_INT32);
     end_ = AddInput(TensorType_INT32);
@@ -46,7 +47,8 @@ class StridedSliceOpModel : public SingleOpModel {
         CreateStridedSliceOptions(builder_, begin_mask, end_mask, ellipsis_mask,
                                   new_axis_mask, shrink_axis_mask)
             .Union());
-    BuildInterpreter({input_shape, begin_shape, end_shape, strides_shape});
+    BuildInterpreter({input_shape, begin_shape, end_shape, strides_shape},
+                     use_simple_allocator);
   }
 
   void SetInput(std::initializer_list<input_type> data) {
@@ -669,7 +671,7 @@ TYPED_TEST(StridedSliceOpTest, In3D_SmallBeginWithhrinkAxis1) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
 }
 
-TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
+TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBeginEndMask) {
   StridedSliceOpModel<TypeParam> m({1, 1, 2}, {1}, {1}, {1}, 0, 1, 0, 0, 0);
   m.SetInput({1, 2});
   m.SetBegin({1});
@@ -679,6 +681,16 @@ TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2}));
 }
 
+TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
+  StridedSliceOpModel<TypeParam> m({1, 1, 2}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2});
+  m.SetBegin({1});
+  m.SetEnd({0});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2}));
+}
+
 TYPED_TEST(StridedSliceOpTest, In3D_Backward) {
   StridedSliceOpModel<TypeParam> m({1, 1, 2}, {3}, {3}, {3}, 6, 7, 0, 0, 0);
   m.SetInput({1, 2});
@@ -853,5 +865,86 @@ TYPED_TEST(StridedSliceOpTest, NoInfiniteLoop) {
   m.Invoke();
 }
 
+TYPED_TEST(StridedSliceOpTest, MinusThreeMinusFourMinusOne) {
+  StridedSliceOpModel<TypeParam> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4});
+  m.SetBegin({-3});
+  m.SetEnd({-4});
+  m.SetStrides({-1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({2}));
+}
+
+TYPED_TEST(StridedSliceOpTest, MinusFourMinusThreeOne) {
+  StridedSliceOpModel<TypeParam> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4});
+  m.SetBegin({-4});
+  m.SetEnd({-3});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOne) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({2});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOneShrinkAxis) {
+  StridedSliceOpModel<TypeParam> m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetInput({1, 2, 3});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({2}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOneShrinkAxisOOB) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetInput({2});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, OutOfBounds) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetBegin({1});
+  m.SetEnd({2});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, StrideOutOfBounds) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetBegin({1});
+  m.SetEnd({4});
+  m.SetStrides({7});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, NegEndMask) {
+  StridedSliceOpModel<TypeParam> m({2, 3}, {2}, {2}, {2}, 0, 0b10, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.SetBegin({0, -1});
+  m.SetEnd({2, -3});
+  m.SetStrides({1, -1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 2, 1, 6, 5, 4}));
+}
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc
index 0c6e410205fc3e..4084193ab48eb1 100644
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@@ -177,7 +177,13 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
                                      int num_threads,
                                      bool allow_fp32_relax_to_fp16,
                                      bool apply_delegate,
-                                     bool allocate_and_delegate) {
+                                     bool allocate_and_delegate,
+                                     bool use_simple_allocator) {
+  input_shapes_ = input_shapes;
+  allow_fp32_relax_to_fp16_ = allow_fp32_relax_to_fp16;
+  apply_delegate_ = apply_delegate;
+  allocate_and_delegate_ = allocate_and_delegate;
+
   auto opcodes = builder_.CreateVector(opcodes_);
   auto operators = builder_.CreateVector(operators_);
   auto tensors = builder_.CreateVector(tensors_);
@@ -197,7 +203,7 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
   uint8_t* buffer_pointer = builder_.GetBufferPointer();
   UpdateOpVersion(buffer_pointer);
 
-  bool use_simple_allocator =
+  use_simple_allocator |=
       tflite::KernelTestDelegateProviders::Get()->ConstParams().Get<bool>(
           tflite::KernelTestDelegateProviders::kUseSimpleAllocator);
 
@@ -284,11 +290,12 @@ void SingleOpModel::Invoke() { ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk); }
 
 TfLiteStatus SingleOpModel::InvokeUnchecked() { return interpreter_->Invoke(); }
 
-void SingleOpModel::BuildInterpreter(
-    std::vector<std::vector<int>> input_shapes) {
+void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                                     bool use_simple_allocator) {
   BuildInterpreter(input_shapes, /*num_threads=*/-1,
                    /*allow_fp32_relax_to_fp16=*/false,
-                   /*apply_delegate=*/true, /*allocate_and_delegate=*/true);
+                   /*apply_delegate=*/true, /*allocate_and_delegate=*/true,
+                   use_simple_allocator);
 }
 
 // static
diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index 65e49286d51758..f2cee471a411a9 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -498,9 +498,11 @@ class SingleOpModel {
   // `apply_delegate` is ignored.
   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
                         int num_threads, bool allow_fp32_relax_to_fp16,
-                        bool apply_delegate, bool allocate_and_delegate = true);
+                        bool apply_delegate, bool allocate_and_delegate = true,
+                        bool use_simple_allocator = false);
 
-  void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
+  void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                        bool use_simple_allocator = false);
 
   // Executes inference, asserting success.
   void Invoke();
diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.cc b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
index 5fabf660e2e1a9..452d7da60a7971 100644
--- a/tensorflow/lite/python/interpreter_wrapper/numpy.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
@@ -162,6 +162,13 @@ bool FillStringBufferFromPyString(PyObject* value,
 
 bool FillStringBufferWithPyArray(PyObject* value,
                                  DynamicBuffer* dynamic_buffer) {
+  if (!PyArray_Check(value)) {
+    PyErr_Format(PyExc_ValueError,
+                 "Passed in value type is not a numpy array, got type %s.",
+                 value->ob_type->tp_name);
+    return false;
+  }
+
   PyArrayObject* array = reinterpret_cast<PyArrayObject*>(value);
   switch (PyArray_TYPE(array)) {
     case NPY_OBJECT:
diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
index 4abd0507dddbb9..ad928018520292 100644
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@@ -136,6 +136,35 @@ def testScalarInput(self):
     actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
     self.assertEqual(expected_value.numpy(), actual_value)
 
+  @test_util.run_v2_only
+  def testStringInput(self):
+
+    class Model(tf.Module):
+
+      @tf.function
+      def __call__(self, x):
+        return x
+
+    root = Model()
+    concrete_func = root.__call__.get_concrete_function(
+        tf.constant([str(x) for x in range(11)]))
+    # Convert model.
+    converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func],
+                                                               root)
+    tflite_model = converter.convert()
+    input_data = tf.constant([str(x) for x in range(11)],
+                             shape=(11,),
+                             dtype=tf.dtypes.string)
+    # Check values from converted model.
+    interpreter = tf.lite.Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+    my_signature = interpreter.get_signature_runner()
+
+    with self.assertRaises(ValueError) as error:
+      _ = my_signature(x=input_data)
+    self.assertIn('Passed in value type is not a numpy array, got type ',
+                  str(error.exception))
+
   @test_util.run_v2_only
   def testModelWithoutInputs(self):
 
diff --git a/tensorflow/lite/tools/pip_package/Dockerfile b/tensorflow/lite/tools/pip_package/Dockerfile
deleted file mode 100644
index 9059f81cc85bcf..00000000000000
--- a/tensorflow/lite/tools/pip_package/Dockerfile
+++ /dev/null
@@ -1,46 +0,0 @@
-ARG IMAGE
-FROM ${IMAGE}
-
-COPY update_sources.sh /
-RUN /update_sources.sh
-
-RUN dpkg --add-architecture armhf
-RUN dpkg --add-architecture arm64
-RUN apt-get update && \
-    apt-get install -y \
-      debhelper \
-      dh-python \
-      python-all \
-      python-setuptools \
-      python-wheel \
-      python-numpy \
-      python-pip \
-      pybind11-dev \
-      libpython-dev \
-      libpython-dev:armhf \
-      libpython-dev:arm64 \
-      python3-all \
-      python3-setuptools \
-      python3-wheel \
-      python3-numpy \
-      python3-pip \
-      libpython3-dev \
-      libpython3-dev:armhf \
-      libpython3-dev:arm64 \
-      crossbuild-essential-armhf \
-      crossbuild-essential-arm64 \
-      zlib1g-dev  \
-      zlib1g-dev:armhf \
-      zlib1g-dev:arm64 \
-      curl \
-      unzip \
-      git && \
-    apt-get clean
-RUN pip install pip --upgrade
-RUN pip install pybind11
-RUN pip3 install pip --upgrade
-RUN pip3 install pybind11
-RUN curl -OL https://github.com/Kitware/CMake/releases/download/v3.16.8/cmake-3.16.8-Linux-x86_64.sh
-RUN mkdir /opt/cmake
-RUN sh cmake-3.16.8-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
-RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
diff --git a/tensorflow/lite/tools/pip_package/Dockerfile.py3 b/tensorflow/lite/tools/pip_package/Dockerfile.py3
index da34f2d39ce242..664f713e4ead44 100644
--- a/tensorflow/lite/tools/pip_package/Dockerfile.py3
+++ b/tensorflow/lite/tools/pip_package/Dockerfile.py3
@@ -1,3 +1,17 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 ARG IMAGE
 FROM ${IMAGE}
 ARG PYTHON_VERSION
@@ -5,27 +19,26 @@ ARG PYTHON_VERSION
 COPY update_sources.sh /
 RUN /update_sources.sh
 
-RUN dpkg --add-architecture armhf
-RUN dpkg --add-architecture arm64
 RUN apt-get update && \
     apt-get install -y \
+      build-essential \
       software-properties-common \
-      debhelper \
-      crossbuild-essential-armhf \
-      crossbuild-essential-arm64 \
       zlib1g-dev  \
-      zlib1g-dev:armhf \
-      zlib1g-dev:arm64 \
       curl \
       unzip \
       git && \
     apt-get clean
 
+RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata
+# Install Python packages.
+RUN dpkg --add-architecture armhf
+RUN dpkg --add-architecture arm64
 RUN yes | add-apt-repository ppa:deadsnakes/ppa
 RUN apt-get update && \
     apt-get install -y \
       python$PYTHON_VERSION \
       python$PYTHON_VERSION-dev \
+      python$PYTHON_VERSION-venv \
       python$PYTHON_VERSION-distutils \
       libpython$PYTHON_VERSION-dev \
       libpython$PYTHON_VERSION-dev:armhf \
@@ -42,3 +55,8 @@ RUN curl -OL https://github.com/Kitware/CMake/releases/download/v3.16.8/cmake-3.
 RUN mkdir /opt/cmake
 RUN sh cmake-3.16.8-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
 RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
+
+ENV CI_BUILD_PYTHON=python$PYTHON_VERSION
+ENV CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python$PYTHON_VERSION
+
+COPY with_the_same_user /
diff --git a/tensorflow/lite/tools/pip_package/Makefile b/tensorflow/lite/tools/pip_package/Makefile
index 4cec686858c2b8..aed48aca5383f2 100644
--- a/tensorflow/lite/tools/pip_package/Makefile
+++ b/tensorflow/lite/tools/pip_package/Makefile
@@ -1,19 +1,45 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Values: debian:<version>, ubuntu:<version>
-BASE_IMAGE ?= debian:buster
-# Values: python, python3
-PYTHON ?= python3
+BASE_IMAGE ?= ubuntu:18.04
+PYTHON_VERSION ?= 3.9
 # Values: rpi, aarch64, native
 TENSORFLOW_TARGET ?= native
-# Values: n, y
-BUILD_DEB ?= n
+WHEEL_PROJECT_NAME ?= tflite_runtime
 # Values: according to https://www.python.org/dev/peps/pep-0440/
 VERSION_SUFFIX ?=
 
 MAKEFILE_DIR := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
 TENSORFLOW_DIR := $(MAKEFILE_DIR)/../../../..
-OUT_DIR := $(CURDIR)/out/$(PYTHON)/$(subst :,-,$(BASE_IMAGE))
 TAG_IMAGE := "tflite-runtime-builder-$(subst :,-,$(BASE_IMAGE))"
 
+DOCKER_PARAMS := --pid=host \
+    --env "CI_BUILD_USER=$(shell id -u -n)" \
+    --env "CI_BUILD_UID=$(shell id -u)" \
+    --env "CI_BUILD_GROUP=$(shell id -g -n)" \
+    --env "CI_BUILD_GID=$(shell id -g)" \
+    --env "CI_BUILD_HOME=$(TENSORFLOW_DIR)/bazel-ci_build-cache" \
+    --env "WHEEL_PROJECT_NAME=$(WHEEL_PROJECT_NAME)" \
+    --env "VERSION_SUFFIX=$(VERSION_SUFFIX)" \
+    --volume $(TENSORFLOW_DIR):/tensorflow \
+    --workdir /tensorflow
+
+ifneq ($(WHEEL_PLATFORM_NAME),)
+	DOCKER_PARAMS += --env WHEEL_PLATFORM_NAME=$(WHEEL_PLATFORM_NAME)
+endif
+
 .PHONY: help \
         docker-image \
         docker-shell \
@@ -27,33 +53,18 @@ help:
 	@echo "make clean        -- remove wheel and deb files"
 
 docker-image:
-ifeq ($(BASE_IMAGE),ubuntu:16.04)
-	docker build -t $(TAG_IMAGE) --build-arg IMAGE=$(BASE_IMAGE) --build-arg PYTHON_VERSION=3.8 -f Dockerfile.py3 .
-else
-	docker build -t $(TAG_IMAGE) --build-arg IMAGE=$(BASE_IMAGE) .
-endif
+	docker build -t $(TAG_IMAGE) --build-arg IMAGE=$(BASE_IMAGE) --build-arg PYTHON_VERSION=$(PYTHON_VERSION) -f $(MAKEFILE_DIR)/Dockerfile.py3 $(MAKEFILE_DIR)/.
 
 docker-shell: docker-image
+	mkdir -p $(TENSORFLOW_DIR)/bazel-ci_build-cache
 	docker run --rm --interactive --tty \
-		--volume $(TENSORFLOW_DIR):/tensorflow \
-		--workdir /tensorflow \
-		$(TAG_IMAGE)
+		$(DOCKER_PARAMS) \
+		$(TAG_IMAGE) /with_the_same_user /bin/bash
 
 docker-build: docker-image
-	mkdir -p $(OUT_DIR)
-	docker run --user $(shell id -u):$(shell id -g) \
+	mkdir -p $(TENSORFLOW_DIR)/bazel-ci_build-cache
+	docker run \
 		--rm --interactive $(shell tty -s && echo --tty) \
-		--env "PYTHON=$(PYTHON)" \
-		--env "TENSORFLOW_TARGET=$(TENSORFLOW_TARGET)" \
-		--env "BUILD_DEB=$(BUILD_DEB)" \
-		--env "VERSION_SUFFIX=$(VERSION_SUFFIX)" \
-		--volume $(TENSORFLOW_DIR):/tensorflow \
-		--volume $(OUT_DIR):/out \
+		$(DOCKER_PARAMS) \
 		$(TAG_IMAGE) \
-		/bin/bash -c "/tensorflow/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh && \
-		              (cp /tensorflow/tensorflow/lite/tools/pip_package/gen/tflite_pip/*.deb \
-		                  /tensorflow/tensorflow/lite/tools/pip_package/gen/tflite_pip/${PYTHON}/dist/{*.whl,*.tar.gz} \
-		                  /out 2>/dev/null || true)"
-
-clean:
-	rm -rf $(CURDIR)/out
\ No newline at end of file
+		/with_the_same_user /bin/bash -C /tensorflow/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh $(TENSORFLOW_TARGET)
diff --git a/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh b/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh
index cafb8540264866..9026c3db2c7dbe 100755
--- a/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh
+++ b/tensorflow/lite/tools/pip_package/build_pip_package_with_bazel.sh
@@ -22,6 +22,7 @@ export TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.."
 TENSORFLOW_LITE_DIR="${TENSORFLOW_DIR}/tensorflow/lite"
 TENSORFLOW_VERSION=$(grep "_VERSION = " "${TENSORFLOW_DIR}/tensorflow/tools/pip_package/setup.py" | cut -d= -f2 | sed "s/[ '-]//g")
 export PACKAGE_VERSION="${TENSORFLOW_VERSION}${VERSION_SUFFIX}"
+export PROJECT_NAME=${WHEEL_PROJECT_NAME:-tflite_runtime}
 BUILD_DIR="${SCRIPT_DIR}/gen/tflite_pip/${PYTHON}"
 TENSORFLOW_TARGET=${TENSORFLOW_TARGET:-$1}
 if [ "${TENSORFLOW_TARGET}" = "rpi" ]; then
diff --git a/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh b/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh
index 35eb3a917fb921..f20765164ead82 100755
--- a/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh
+++ b/tensorflow/lite/tools/pip_package/build_pip_package_with_cmake.sh
@@ -22,6 +22,7 @@ export TENSORFLOW_DIR="${SCRIPT_DIR}/../../../.."
 TENSORFLOW_LITE_DIR="${TENSORFLOW_DIR}/tensorflow/lite"
 TENSORFLOW_VERSION=$(grep "_VERSION = " "${TENSORFLOW_DIR}/tensorflow/tools/pip_package/setup.py" | cut -d= -f2 | sed "s/[ '-]//g")
 export PACKAGE_VERSION="${TENSORFLOW_VERSION}${VERSION_SUFFIX}"
+export PROJECT_NAME=${WHEEL_PROJECT_NAME:-tflite_runtime}
 BUILD_DIR="${SCRIPT_DIR}/gen/tflite_pip/${PYTHON}"
 TENSORFLOW_TARGET=${TENSORFLOW_TARGET:-$1}
 if [ "${TENSORFLOW_TARGET}" = "rpi" ]; then
diff --git a/tensorflow/lite/tools/pip_package/setup_with_binary.py b/tensorflow/lite/tools/pip_package/setup_with_binary.py
index 52d8fd4c96dbe1..58f197e773c963 100644
--- a/tensorflow/lite/tools/pip_package/setup_with_binary.py
+++ b/tensorflow/lite/tools/pip_package/setup_with_binary.py
@@ -24,7 +24,7 @@
 
 from setuptools import find_packages
 from setuptools import setup
-PACKAGE_NAME = 'tflite_runtime'
+PACKAGE_NAME = os.environ['PROJECT_NAME']
 PACKAGE_VERSION = os.environ['PACKAGE_VERSION']
 DOCLINES = __doc__.split('\n')
 
@@ -47,11 +47,9 @@
         'Intended Audience :: Science/Research',
         'License :: OSI Approved :: Apache Software License',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
-        'Programming Language :: Python :: 3.5',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Topic :: Scientific/Engineering',
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
diff --git a/tensorflow/lite/tools/pip_package/with_the_same_user b/tensorflow/lite/tools/pip_package/with_the_same_user
new file mode 100755
index 00000000000000..dd64195739d16b
--- /dev/null
+++ b/tensorflow/lite/tools/pip_package/with_the_same_user
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# This script is a wrapper creating the same user inside container as the one
+# running the ci_build.sh outside the container. It also set the home directory
+# for the user inside container to match the same absolute path as the workspace
+# outside of container.
+# We do this so that the bazel running inside container generate symbolic links
+# and user permissions which makes sense outside of container.
+# Do not run this manually. It does not make sense. It is intended to be called
+# by ci_build.sh only.
+
+set -e
+
+COMMAND=("$@")
+
+if ! touch /this_is_writable_file_system; then
+  echo "You can't write to your filesystem!"
+  echo "If you are in Docker you should check you do not have too many images" \
+      "with too many files in them. Docker has some issue with it."
+  exit 1
+else
+  rm /this_is_writable_file_system
+fi
+
+if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then
+  ADDUSER_OPTS="--force-badname"
+fi
+
+apt-get install sudo
+
+getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
+getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
+    --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
+    --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
+    --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
+usermod -a -G sudo "${CI_BUILD_USER}"
+echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
+
+if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+  # ROCm requires the video group in order to use the GPU for compute. If it
+  # exists on the host, add it to the container.
+  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+fi
+
+if [ -e /root/.bazelrc ]; then
+  cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
+  chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
+fi
+
+sudo -u "#${CI_BUILD_UID}" --preserve-env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
+"HOME=${CI_BUILD_HOME}" ${COMMAND[@]}
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 8b5d299af21371..b7952f3dfdade6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -195,6 +195,7 @@ tf_py_test(
     size = "small",
     srcs = ["group_by_reducer_test.py"],
     shard_count = 12,
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index b8e3fa6ac805c4..c65dadc581dd51 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -70,6 +70,7 @@ tf_py_test(
     srcs = ["cache_test.py"],
     tags = [
         "notsan",  # TODO(b/206452257): re-enable after flakiness resolved.
+        "no_oss",
     ],
     deps = [
         ":checkpoint_test_base",
@@ -538,6 +539,7 @@ tf_py_test(
     name = "map_test",
     size = "small",
     srcs = ["map_test.py"],
+    tags = ["no_oss"],
     shard_count = 19,
     deps = [
         ":checkpoint_test_base",
@@ -890,6 +892,7 @@ tf_py_test(
         "no_tsan",  # TODO(b/191433147): reenable
         "no_windows",  # TODO(b/182379890)
         "notap",  # TODO(b/192359227)
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         ":checkpoint_test_base",
@@ -991,6 +994,7 @@ tf_py_test(
     name = "tf_record_dataset_test",
     size = "small",
     srcs = ["tf_record_dataset_test.py"],
+    tags = ["no_oss"],
     shard_count = 8,
     deps = [
         ":checkpoint_test_base",
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index fc48d07218ecc6..80a4a6c0d124e6 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -256,6 +256,13 @@ PARSE_VALUE(ParseFloatValue, float, PyFloat_Check, PyFloat_AsDouble)
 #if PY_MAJOR_VERSION < 3
 bool ParseInt64Value(const string& key, PyObject* py_value, TF_Status* status,
                      int64_t* value) {
+  if (py_value == nullptr) {
+    TF_SetStatus(status, TF_INVALID_ARGUMENT,
+                 tensorflow::strings::StrCat(
+                     "Expecting int or long value for attr ", key, "."))
+        .c_str();
+    return false;
+  }
   if (PyInt_Check(py_value)) {
     *value = static_cast<int64_t>(PyInt_AsLong(py_value));
     return true;
@@ -389,11 +396,24 @@ bool SetOpAttrList(TFE_Context* ctx, TFE_Op* op, const char* key,
   const int num_values = PySequence_Size(py_list);
   if (attr_list_sizes != nullptr) (*attr_list_sizes)[key] = num_values;
 
-#define PARSE_LIST(c_type, parse_fn)                                      \
-  std::unique_ptr<c_type[]> values(new c_type[num_values]);               \
-  for (int i = 0; i < num_values; ++i) {                                  \
-    tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));   \
-    if (!parse_fn(key, py_value.get(), status, &values[i])) return false; \
+#define SEQUENCE_ITEM_NULL_CHECK(c_type, item)                           \
+  if (!item) {                                                           \
+    TF_SetStatus(status, TF_INVALID_ARGUMENT,                            \
+                 tensorflow::strings::StrCat(                            \
+                     "Expecting sequence of " #c_type " for attr ", key, \
+                     ", got ", py_list->ob_type->tp_name)                \
+                     .c_str());                                          \
+    return false;                                                        \
+  }
+
+#define PARSE_LIST(c_type, parse_fn)                                    \
+  std::unique_ptr<c_type[]> values(new c_type[num_values]);             \
+  for (int i = 0; i < num_values; ++i) {                                \
+    tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); \
+    SEQUENCE_ITEM_NULL_CHECK(c_type, py_value);                         \
+    if (!parse_fn(key, py_value.get(), status, &values[i])) {           \
+      return false;                                                     \
+    }                                                                   \
   }
 
   if (type == TF_ATTR_STRING) {
@@ -402,6 +422,7 @@ bool SetOpAttrList(TFE_Context* ctx, TFE_Op* op, const char* key,
     for (int i = 0; i < num_values; ++i) {
       tensorflow::StringPiece value;
       tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));
+      SEQUENCE_ITEM_NULL_CHECK(string, py_value);
       if (!ParseStringValue(key, py_value.get(), status, &value)) return false;
       values[i] = value.data();
       lengths[i] = value.size();
@@ -680,9 +701,12 @@ bool SetOpAttrScalar(TFE_Context* ctx, TFE_Op* op, const char* key,
       for (int i = 0; i < num_dims; ++i) {
         tensorflow::Safe_PyObjectPtr inner_py_value(
             PySequence_ITEM(py_value, i));
+        // If an error is generated when iterating through object, we can
+        // sometimes get a nullptr.
         if (inner_py_value.get() == Py_None) {
           dims[i] = -1;
-        } else if (!ParseDimensionValue(key, inner_py_value.get(), status,
+        } else if (inner_py_value.get() == nullptr ||
+                   !ParseDimensionValue(key, inner_py_value.get(), status,
                                         &dims[i])) {
           return false;
         }
diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index befa08e34bf1aa..6592c408640e6c 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -475,6 +475,24 @@ def testEagerTensorFormatForVariant(self):
     self.assertEqual(
         f"{t!r}", "<tf.Tensor: shape=(), dtype=variant, value=<TensorList>>")
 
+  def testNumpyTooManyDimensions(self):
+    t = constant_op.constant(1., shape=[1] * 33)
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Cannot convert tensor with 33 dimensions to NumPy array. NumPy arrays "
+        "can have at most 32 dimensions"):
+      t.numpy()
+
+  def testNumpyDimsTooBig(self):
+    # Creating a Numpy array fails in some cases if the product of non-zero
+    # dimensions is very big, even if the shape also has a zero in it.
+    t = array_ops.ones((0, 2**31, 2**31))
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r"Failed to create numpy array from tensor of shape "
+        r"\[0, 2147483648, 2147483648\]. Numpy error.*array is too big"):
+      t.numpy()
+
 
 class TFETensorUtilTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/framework/BUILD b/tensorflow/python/framework/BUILD
index b5f30d3fb2ebd9..e8485d6ccdb92e 100644
--- a/tensorflow/python/framework/BUILD
+++ b/tensorflow/python/framework/BUILD
@@ -1568,7 +1568,10 @@ tf_py_test(
     srcs = ["importer_test.py"],
     main = "importer_test.py",
     python_version = "PY3",
-    tags = ["no_rocm"],
+    tags = [
+        "no_oss",
+        "no_rocm",
+    ],
     deps = [
         ":for_generated_wrappers",
         ":framework",
diff --git a/tensorflow/python/keras/utils/BUILD b/tensorflow/python/keras/utils/BUILD
index af9568fd1d76ae..bb3fec9cbc6a76 100644
--- a/tensorflow/python/keras/utils/BUILD
+++ b/tensorflow/python/keras/utils/BUILD
@@ -446,6 +446,7 @@ tf_py_test(
     size = "small",
     srcs = ["conv_utils_test.py"],
     python_version = "PY3",
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/keras",
diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD
index 1219203462a204..1586623703e8e7 100644
--- a/tensorflow/python/kernel_tests/array_ops/BUILD
+++ b/tensorflow/python/kernel_tests/array_ops/BUILD
@@ -19,6 +19,7 @@ cuda_py_test(
     tags = [
         "noasan",  # times out
         "optonly",  # times out
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -140,7 +141,10 @@ cuda_py_test(
     name = "concat_op_test",
     size = "medium",
     srcs = ["concat_op_test.py"],
-    tags = ["no_windows"],  # b/126916429
+    tags = [
+        "no_windows",  # b/126916429
+        "no_oss",  # TODO(b/258503209): Disable the test.
+    ],
     xla_tags = [
         "no_cuda_asan",  # times out
     ],
@@ -447,7 +451,10 @@ cuda_py_test(
     name = "pad_op_test",
     size = "small",
     srcs = ["pad_op_test.py"],
-    tags = ["no_mac"],  # test is times out on mac b/186262388
+    tags = [
+        "no_mac",  # test is times out on mac b/186262388
+        "no_oss",  # TODO(b/258503209): Disable the test.
+    ],
     xla_tags = [
         "no_cuda_asan",  # times out
     ],
@@ -560,6 +567,7 @@ cuda_py_test(
     name = "slice_op_test",
     size = "medium",
     srcs = ["slice_op_test.py"],
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -609,6 +617,7 @@ cuda_py_test(
     name = "split_op_test",
     size = "medium",
     srcs = ["split_op_test.py"],
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/array_ops/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
index 2ab760aba36bd1..8a0a3275fb654e 100644
--- a/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
@@ -351,6 +351,15 @@ def testExpandDimsWithNonScalarDim(self):
                                 "must be a tensor with a single value"):
       array_ops.expand_dims(1, axis=[0, 1])
 
+  def testReshapeWithManyDims(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                "too many dimensions"):
+      self.evaluate(
+          array_ops.reshape(
+              tensor=[[1]],
+              shape=constant_op.constant([1 for i in range(254)],
+                                         dtype=dtypes.int64)))
+
 
 @test_util.with_eager_op_as_function
 class ReverseV2Test(test_util.TensorFlowTestCase):
@@ -1513,6 +1522,21 @@ def testEager(self):
                           [[0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 2, 3, 0, 0],
                            [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]])
 
+  # b/246325518: Bad shape size. Explicitly testing different execution paths.
+  def testInvalidMirrorPadGradEagerMode(self):
+    with context.eager_mode():
+      with self.assertRaises(Exception):
+        gen_array_ops.MirrorPadGrad(
+            input=[1], paddings=[[0x77f00000, 0xa000000]], mode="REFLECT")
+
+  # b/246325518: Bad shape size. Explicitly testing different execution paths.
+  def testInvalidMirrorPadGradGraphMode(self):
+    with context.graph_mode():
+      with self.assertRaises(Exception):
+        result = gen_array_ops.MirrorPadGrad(
+            input=[1], paddings=[[0x77f00000, 0xa000000]], mode="REFLECT")
+        self.evaluate(result)
+
   def testSymmetricMirrorPadGrad(self):
     t = np.broadcast_to(np.arange(0, 7), (3, 2, 1, 7))
     paddings = constant_op.constant([
@@ -1736,6 +1760,72 @@ def testOutOfBoundAxis(self):
               max_range=input_max,
               axis=2**31 - 1))
 
+  @test_util.run_v2_only
+  def testInvalidAxis(self):
+
+    @def_function.function
+    def test_quantize_and_dequantize_v2():
+      gen_array_ops.quantize_and_dequantize_v2(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          signed_input=True,
+          num_bits=1,
+          range_given=True,
+          round_mode="HALF_TO_EVEN",
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v3():
+      gen_array_ops.quantize_and_dequantize_v3(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          num_bits=1,
+          signed_input=True,
+          range_given=True,
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v4():
+      gen_array_ops.quantize_and_dequantize_v4(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          signed_input=True,
+          num_bits=1,
+          range_given=True,
+          round_mode="HALF_TO_EVEN",
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v4_grad():
+      gen_array_ops.quantize_and_dequantize_v4_grad(
+          gradients=[2.5],
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          axis=0x7fffffff)
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v2()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v3()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v4()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v4_grad()
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class SortedSearchTest(test_util.TensorFlowTestCase):
@@ -1956,6 +2046,17 @@ def testZeroValueSize(self):
                 side=side,
                 out_type=dtype), array_ops.zeros([2, 0], dtype))
 
+  def testZeroInputSize(self):
+    dtype = dtypes.int32
+    for side in ("left", "right"):
+      with self.subTest(side=side):
+        self.assertAllEqual(
+            array_ops.searchsorted(
+                array_ops.ones([2, 0]),
+                array_ops.ones([2, 3]),
+                side=side,
+                out_type=dtype), array_ops.zeros([2, 3], dtype))
+
   def testInt64(self):
 
     @def_function.function
diff --git a/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py b/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
index 9996a4f621e4bf..c3720efa6bd305 100644
--- a/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
+++ b/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
@@ -207,6 +207,24 @@ def testEditDistanceZeroLengthHypothesisAndTruth(self):
         normalize=True,
         expected_output=expected_output)
 
+  def testEditDistanceBadIndices(self):
+    hypothesis_indices = np.full((3, 3), -1250999896764, dtype=np.int64)
+    hypothesis_values = np.zeros(3, dtype=np.int64)
+    hypothesis_shape = np.zeros(3, dtype=np.int64)
+    truth_indices = np.full((3, 3), -1250999896764, dtype=np.int64)
+    truth_values = np.full([3], 2, dtype=np.int64)
+    truth_shape = np.full([3], 2, dtype=np.int64)
+    expected_output = []  # dummy; ignored
+
+    self._testEditDistance(
+        hypothesis=(hypothesis_indices, hypothesis_values, hypothesis_shape),
+        truth=(truth_indices, truth_values, truth_shape),
+        normalize=False,
+        expected_output=expected_output,
+        expected_err_re=(r"inner product -\d+ which would require writing "
+                         "to outside of the buffer for the output tensor")
+    )
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/array_ops/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/array_ops/spacetobatch_op_test.py
index a095aced262ecd..5e682364837e55 100644
--- a/tensorflow/python/kernel_tests/array_ops/spacetobatch_op_test.py
+++ b/tensorflow/python/kernel_tests/array_ops/spacetobatch_op_test.py
@@ -16,7 +16,9 @@
 
 import numpy as np
 
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
@@ -516,6 +518,27 @@ def testUnknown(self):
             dtypes.float32, shape=(3, 2, 3, 2)), [2, 3], [[1, 1], [0, 0]])
     self.assertEqual([3 * 2 * 3, 2, 1, 2], t.get_shape().as_list())
 
+  @test_util.run_in_graph_and_eager_modes
+  def testInvalidBlockShape(self):
+    tf_in = constant_op.constant(
+        -3.5e+35, shape=[10, 20, 20], dtype=dtypes.float32)
+    block_shape = constant_op.constant(-10, shape=[2], dtype=dtypes.int64)
+    paddings = constant_op.constant(0, shape=[2, 2], dtype=dtypes.int32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "block_shape must be positive"):
+      array_ops.space_to_batch_nd(tf_in, block_shape, paddings)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testOutputSizeOutOfBounds(self):
+    tf_in = constant_op.constant(
+        -3.5e+35, shape=[10, 19, 22], dtype=dtypes.float32)
+    block_shape = constant_op.constant(
+        1879048192, shape=[2], dtype=dtypes.int64)
+    paddings = constant_op.constant(0, shape=[2, 2], dtype=dtypes.int32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Negative.* dimension size caused by overflow"):
+      array_ops.space_to_batch_nd(tf_in, block_shape, paddings)
+
 
 class SpaceToBatchGradientTest(test.TestCase, PythonOpImpl):
 
diff --git a/tensorflow/python/kernel_tests/composite_tensor_ops_test.py b/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
index e5e9d1ef9bf6d9..4bce5c624d2ea9 100644
--- a/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
+++ b/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
@@ -18,11 +18,14 @@
 
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import composite_tensor_ops
+from tensorflow.python.ops import gen_composite_tensor_ops
+from tensorflow.python.ops import gen_list_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
@@ -83,6 +86,30 @@ def testEncodingErrors(self, value, spec, message):
     with self.assertRaisesRegex(ValueError, message):
       composite_tensor_ops.composite_tensor_to_variants(value(), spec)
 
+  def testDecodingEmptyNonScalarTensorError(self):
+    if not context.executing_eagerly():
+      # Creating a variant tensor of an empty list is not allowed in eager mode.
+      return
+
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'must not be an empty variant tensor'):
+      gen_composite_tensor_ops.CompositeTensorVariantToComponents(
+          encoded=constant_op.constant([], dtype=dtypes.variant),
+          metadata='',
+          Tcomponents=[dtypes.int32])
+
+  def testDecodingInvalidEncodedInputError(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'not a valid CompositeTensorVariant tensor'):
+      self.evaluate(
+          gen_composite_tensor_ops.CompositeTensorVariantToComponents(
+              encoded=gen_list_ops.EmptyTensorList(
+                  element_dtype=dtypes.int32,
+                  element_shape=[1, 2],
+                  max_num_elements=2),
+              metadata='',
+              Tcomponents=[dtypes.int32]))
+
   def testRoundTripThroughTensorProto(self):
     value = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]])
     encoded = composite_tensor_ops.composite_tensor_to_variants(value)
diff --git a/tensorflow/python/kernel_tests/control_flow/BUILD b/tensorflow/python/kernel_tests/control_flow/BUILD
index e419fccb7fc345..946a3ef56df96b 100644
--- a/tensorflow/python/kernel_tests/control_flow/BUILD
+++ b/tensorflow/python/kernel_tests/control_flow/BUILD
@@ -181,6 +181,7 @@ cuda_py_test(
     name = "scan_ops_test",
     size = "medium",
     srcs = ["scan_ops_test.py"],
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
diff --git a/tensorflow/python/kernel_tests/data_structures/BUILD b/tensorflow/python/kernel_tests/data_structures/BUILD
index 79c46dd4dc18f1..4f9def3b6ab005 100644
--- a/tensorflow/python/kernel_tests/data_structures/BUILD
+++ b/tensorflow/python/kernel_tests/data_structures/BUILD
@@ -165,8 +165,6 @@ tf_py_test(
     grpc_enabled = True,
     tags = [
         "no_windows",  # TODO(b/192259628)
-        "noasan",  # TODO(b/164696004)
-        "notsan",  # TODO(b/164696004)
     ],
     deps = [
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/python/kernel_tests/data_structures/list_ops_test.py b/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
index bc3e2f3c79083b..c95c27eadc0fbd 100644
--- a/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
@@ -94,6 +94,16 @@ def testPopFromEmptyTensorListFails(self, max_num_elements):
       l = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
       self.evaluate(l)
 
+  def testTensorListReserveWithNonScalarNumElements(self):
+    # list_kernels.cc in tf/core/kernels raises InvalidArgumentError, and
+    # tf_ops_n_z.cc in tf/compiler/mlir/tf/ir raises UnknownError.
+    with self.assertRaises((errors.InvalidArgumentError, errors.UnknownError)):
+      l = list_ops.tensor_list_reserve(
+          element_dtype=dtypes.float32,
+          element_shape=[2, 3],
+          num_elements=constant_op.constant([1, 1]))
+      self.evaluate(l)
+
   def testPopUninitializedTensorUseListElementShape(self):
     l = list_ops.tensor_list_reserve(
         element_dtype=dtypes.float32, element_shape=[2, 3], num_elements=3)
@@ -481,6 +491,30 @@ def testScatterOutputListSizeWithNumElementsSpecified(self):
     # TensorListScatter should return a list with size num_elements.
     self.assertAllEqual(list_ops.tensor_list_length(l), 5)
 
+  def testScatterFailsWhenElementShapeIsNotVector(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      l = gen_list_ops.tensor_list_scatter(
+          # Wrong element_shape. Should be at most rank 1.
+          c0, [1, 3], element_shape=[[1]])
+      self.evaluate(l)
+
+  def testScatterV2FailsWhenElementShapeIsNotVector(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      l = gen_list_ops.tensor_list_scatter_v2(
+          # Wrong element_shape. Should be at most rank 1.
+          c0, [1, 3], element_shape=[[1]], num_elements=2)
+      self.evaluate(l)
+
   def testScatterFailsWhenIndexLargerThanNumElements(self):
     c0 = constant_op.constant([1.0, 2.0])
     with self.assertRaisesRegex(
@@ -515,6 +549,17 @@ def testScatterWithNegativeIndicesFails(self):
       l = list_ops.tensor_list_scatter(c0, [-1, -2], element_shape=[])
       self.evaluate(l)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testScatterWithNonScalarFails(self):
+    c = constant_op.constant(value=[2])
+    num_elements = np.array([[], [], []], dtype=np.float32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be rank 0 but is rank \d+|"
+                                r"\w+ must be a scalar"):
+      self.evaluate(
+          gen_list_ops.TensorListScatterV2(
+              tensor=c, indices=c, element_shape=c, num_elements=num_elements))
+
   def testScatterIntoExistingList(self):
     l = list_ops.tensor_list_reserve(
         element_dtype=dtypes.float32, element_shape=[], num_elements=3)
@@ -560,6 +605,17 @@ def testTensorListFromTensor(self):
     self.assertAllEqual(e, 1.0)
     self.assertAllEqual(list_ops.tensor_list_length(l), 0)
 
+  def testTensorListFromTensorFailsWhenElementShapeIsNotVector(self):
+    t = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      # Wrong element_shape. Should be at most rank 1.
+      l = list_ops.tensor_list_from_tensor(t, element_shape=[[1]])
+      self.evaluate(l)
+
   @test_util.run_gpu_only
   def testFromTensorGPU(self):
     with context.device("gpu:0"):
@@ -1458,6 +1514,24 @@ def testConcatWithUninitializedTensorsFailsIfNoInputLengths(self):
       t = list_ops.tensor_list_concat(l, element_dtype=dtypes.float32)
       self.evaluate(t)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testConcatWithInvalidElementShape(self):
+    l = list_ops.tensor_list_reserve(
+        element_dtype=dtypes.float32, element_shape=[], num_elements=0)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"element_shape must not be empty"):
+      self.evaluate(gen_list_ops.tensor_list_concat(
+          input_handle=l, element_dtype=dtypes.float32, element_shape=[]))
+
+  def testEmptyTensorListInvalidShape(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be at most rank 1 but is rank 2"):
+      t = gen_list_ops.EmptyTensorList(
+          element_shape=array_ops.ones(dtype=dtypes.int32, shape=[1, 0]),
+          max_num_elements=constant_op.constant(1),
+          element_dtype=dtypes.int32)
+      self.evaluate(t)
+
   def testEvenSplit(self):
 
     def RunTest(input_tensor, lengths, expected_stacked_output):
@@ -1604,6 +1678,15 @@ def testResizeWithInvalidSizeFails(self):
       l = list_ops.tensor_list_resize(l, -1)
       self.evaluate(l)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testResizeWithNonScalarFails(self):
+    l = list_ops.tensor_list_from_tensor([3, 4, 5], element_shape=[])
+    size = np.zeros([0, 2, 3, 3])
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be rank 0 but is rank \d+|"
+                                r"\w+ must be a scalar"):
+      self.evaluate(gen_list_ops.TensorListResize(input_handle=l, size=size))
+
   @test_util.run_deprecated_v1
   @test_util.enable_control_flow_v2
   def testSkipEagerResizeGrad(self):
diff --git a/tensorflow/python/kernel_tests/data_structures/stage_op_test.py b/tensorflow/python/kernel_tests/data_structures/stage_op_test.py
index c720155f3b6c90..d12624f1065928 100644
--- a/tensorflow/python/kernel_tests/data_structures/stage_op_test.py
+++ b/tensorflow/python/kernel_tests/data_structures/stage_op_test.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -134,6 +135,16 @@ def testPeek(self):
       for i in range(10):
         self.assertTrue(sess.run(peek, feed_dict={p: i}) == [i])
 
+  def testPeekBadIndex(self):
+    stager = data_flow_ops.StagingArea([
+        dtypes.int32,
+    ], shapes=[[10]])
+    stager.put([array_ops.zeros([10], dtype=dtypes.int32)])
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                'must be scalar'):
+      self.evaluate(stager.peek([]))
+
   @test_util.run_deprecated_v1
   def testSizeAndClear(self):
     with ops.Graph().as_default() as G:
diff --git a/tensorflow/python/kernel_tests/image_ops/BUILD b/tensorflow/python/kernel_tests/image_ops/BUILD
index 96de1e11c8efe4..4d46cf8b5a1dc7 100644
--- a/tensorflow/python/kernel_tests/image_ops/BUILD
+++ b/tensorflow/python/kernel_tests/image_ops/BUILD
@@ -102,7 +102,7 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
+cuda_py_test(
     name = "draw_bounding_box_op_test",
     size = "small",
     srcs = ["draw_bounding_box_op_test.py"],
diff --git a/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py b/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
index 5b695e861136b8..a66d8d8a9a2a13 100644
--- a/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
+++ b/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
@@ -16,8 +16,11 @@
 
 import numpy as np
 
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.ops import image_ops_impl
@@ -50,11 +53,16 @@ def _fillBorder(self, image, color):
     image[height - 1, 0:width, 0:depth] = color
     return image
 
-  def _testDrawBoundingBoxColorCycling(self, img, colors=None):
+  def _testDrawBoundingBoxColorCycling(self,
+                                       img,
+                                       dtype=dtypes.float32,
+                                       colors=None):
     """Tests if cycling works appropriately.
 
     Args:
       img: 3-D numpy image on which to draw.
+      dtype: image dtype (float, half).
+      colors: color table.
     """
     color_table = colors
     if colors is None:
@@ -82,7 +90,7 @@ def _testDrawBoundingBoxColorCycling(self, img, colors=None):
       bboxes = math_ops.cast(bboxes, dtypes.float32)
       bboxes = array_ops.expand_dims(bboxes, 0)
       image = ops.convert_to_tensor(image)
-      image = image_ops_impl.convert_image_dtype(image, dtypes.float32)
+      image = image_ops_impl.convert_image_dtype(image, dtype)
       image = array_ops.expand_dims(image, 0)
       image = image_ops.draw_bounding_boxes(image, bboxes, colors=colors)
       with self.cached_session(use_gpu=False) as sess:
@@ -118,6 +126,30 @@ def testDrawBoundingBoxRGBAColorCyclingWithColors(self):
                          [0, 0, 0.5, 1]])
     self._testDrawBoundingBoxColorCycling(image, colors=colors)
 
+  def testDrawBoundingBoxHalf(self):
+    """Test if RGBA color cycling works correctly with provided colors."""
+    image = np.zeros([10, 10, 4], "float32")
+    colors = np.asarray([[0.5, 0, 0.5, 1], [0.5, 0.5, 0, 1], [0.5, 0, 0, 1],
+                         [0, 0, 0.5, 1]])
+    self._testDrawBoundingBoxColorCycling(
+        image, dtype=dtypes.half, colors=colors)
+
+  # generate_bound_box_proposals is only available on GPU.
+  @test_util.run_gpu_only()
+  def testGenerateBoundingBoxProposals(self):
+    # Op only exists on GPU.
+    with self.cached_session(use_gpu=True):
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "must be rank 4"):
+        scores = constant_op.constant(
+            value=[[[[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]]])
+        self.evaluate(
+            image_ops.generate_bounding_box_proposals(
+                scores=scores,
+                bbox_deltas=[],
+                image_info=[],
+                anchors=[],
+                pre_nms_topn=1))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py b/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
index 9d9b7bf7248d41..3247fbb428adba 100644
--- a/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
@@ -17,7 +17,9 @@
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
@@ -139,6 +141,17 @@ def testComplexDataTypes(self):
             padding=padding,
             patches=patches)
 
+  def testInvalidAttributes(self):
+    """Test for passing weird things into ksizes."""
+    with self.assertRaisesRegex(TypeError, "Expected list"):
+      image = constant_op.constant([0.0])
+      ksizes = math_ops.cast(
+          constant_op.constant(dtype=dtypes.int16, value=[[1, 4], [5, 2]]),
+          dtype=dtypes.qint16)
+      strides = [1, 1, 1, 1]
+      self.evaluate(
+          array_ops.extract_image_patches(
+              image, ksizes=ksizes, strides=strides, padding="SAME"))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/io_ops/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/io_ops/checkpoint_ops_test.py
index f357de2de7c845..91618f974b31a2 100644
--- a/tensorflow/python/kernel_tests/io_ops/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/io_ops/checkpoint_ops_test.py
@@ -227,6 +227,32 @@ def test_load_and_remap_all_missing_rows_and_cols(self):
           np.reshape(initializing_values, (num_rows, num_cols)),
           self.evaluate(remapped_matrix))
 
+  def test_load_and_remap_invalid_dims(self):
+    ckpt_path = constant_op.constant(
+        '/tmp/warm_starting_util_test5kl2a3pc/tmpph76tep2/model-0',
+        shape=[],
+        dtype=dtypes.string)
+    old_tensor_name = constant_op.constant(
+        '/tmp/warm_starting_util_test5kl2a3pc/tmpph76tep2/model-0',
+        shape=[],
+        dtype=dtypes.string)
+    row_remapping = constant_op.constant(0, shape=[], dtype=dtypes.int64)
+    col_remapping = constant_op.constant(3, shape=[3], dtype=dtypes.int64)
+    initializing_values = constant_op.constant([],
+                                               shape=[0, 1],
+                                               dtype=dtypes.float32)
+    with self.cached_session(), self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError), 'tensor must be 1-D'):
+      self.evaluate(
+          gen_checkpoint_ops.load_and_remap_matrix(
+              ckpt_path=ckpt_path,
+              old_tensor_name=old_tensor_name,
+              row_remapping=row_remapping,
+              col_remapping=col_remapping,
+              initializing_values=initializing_values,
+              num_rows=1,
+              num_cols=1))
+
   @test_util.run_deprecated_v1
   def test_load_and_remap_invalid_remapping(self):
     """Tests that errors are raised when an ID maps to multiple new IDs.
diff --git a/tensorflow/python/kernel_tests/linalg/eig_op_test.py b/tensorflow/python/kernel_tests/linalg/eig_op_test.py
index 37ba5b0cc89592..33ce04f50d3baa 100644
--- a/tensorflow/python/kernel_tests/linalg/eig_op_test.py
+++ b/tensorflow/python/kernel_tests/linalg/eig_op_test.py
@@ -18,8 +18,10 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
@@ -88,6 +90,16 @@ def testMatrixThatFailsWhenFlushingDenormsToZero(self):
       self.assertAllClose(matrix,
                           np.matmul(np.matmul(v, np.diag(e)), v.transpose()))
 
+  def testMismatchedDtypes(self):
+    tensor = constant_op.constant([[0, 1], [2, 3]], dtype=dtypes_lib.float32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Invalid output dtype"):
+      self.evaluate(
+          gen_linalg_ops.eig(
+              input=tensor,
+              Tout=dtypes_lib.complex128,  # Expected dtype: complex64.
+              compute_v=True))
+
 
 def SortEigenValues(e):
   perm = np.argsort(e.real + e.imag, -1)
diff --git a/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py b/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
index 0e72a00ef1c5ec..d129bea768e85f 100644
--- a/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
@@ -168,6 +168,25 @@ def testSparseTensorConversion(self):
     self.assertAllClose(a_values, a_st_rt_value.values)
     self.assertAllEqual(a_dense_shape, a_st_rt_value.dense_shape)
 
+  def testSparseTensorConversionInvalidInputShapes(self):
+    values = constant_op.constant(
+        0.554979503, shape=[5], dtype=dtypes.float32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      indices = constant_op.constant(0, shape=[5, 2], dtype=dtypes.int64)
+      dense_shape = constant_op.constant(53, shape=[], dtype=dtypes.int64)
+      csr = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+          indices=indices, values=values, dense_shape=dense_shape)
+      self.evaluate(csr)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 2"):
+      indices = constant_op.constant(0, shape=[5], dtype=dtypes.int64)
+      dense_shape = constant_op.constant(53, shape=[1], dtype=dtypes.int64)
+      csr = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+          indices=indices, values=values, dense_shape=dense_shape)
+      self.evaluate(csr)
+
   # TODO(b/139491352): Add handle_data propagation to array_ops.identity.
   @test_util.run_deprecated_v1
   def testCSRSparseMatrixResourceVariable(self):
@@ -1294,6 +1313,16 @@ def testOrderingAMD(self):
       self.assertLess(cholesky_with_amd_nnz_value,
                       cholesky_without_ordering_nnz_value)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNoMatrixNoCrash(self):
+    # Round-about way of creating an empty variant tensor that works in both
+    # graph and eager modes.
+    no_matrix = array_ops.reshape(dense_to_csr_sparse_matrix([[0.0]]), [1])[0:0]
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "(Invalid input matrix)|(Shape must be rank 0)"):
+      sparse_csr_matrix_ops.sparse_matrix_nnz(no_matrix)
+
 
 class CSRSparseMatrixOpsBenchmark(test.Benchmark):
 
diff --git a/tensorflow/python/kernel_tests/linalg/svd_op_test.py b/tensorflow/python/kernel_tests/linalg/svd_op_test.py
index 8bfad4b55bdd60..bba3a5f629cb59 100644
--- a/tensorflow/python/kernel_tests/linalg/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/linalg/svd_op_test.py
@@ -108,6 +108,14 @@ def testExecuteMultipleWithoutError(self):
     for i in range(0, len(val), 2):
       self.assertAllEqual(val[i], val[i + 1])
 
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
+  def testEmptyBatches(self):
+    matrices = constant_op.constant(1.0, shape=[0, 2, 2])
+    s, u, v = self.evaluate(linalg_ops.svd(matrices))
+    self.assertAllEqual(s, np.zeros([0, 2]))
+    self.assertAllEqual(u, np.zeros([0, 2, 2]))
+    self.assertAllEqual(v, np.zeros([0, 2, 2]))
+
 
 def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
                   full_matrices_):
diff --git a/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py b/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
index 9161b3b082270c..ad0c0469717f5f 100644
--- a/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
@@ -24,6 +24,7 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import bincount_ops
 from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
@@ -150,6 +151,31 @@ def test_shape_function(self):
       v2 = gen_math_ops.bincount([1, 2, 3, 1, 6, 8], s, [])
       self.assertAllEqual(v2.get_shape().as_list(), [None])
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    binary_output = True
+    inp = random_ops.random_uniform(
+        shape=[10, 10],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.int32,
+        seed=-2460)
+    size = random_ops.random_uniform(
+        shape=[], minval=-10000, maxval=10000, dtype=dtypes.int32, seed=-10000)
+    weights = random_ops.random_uniform(
+        shape=[],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-10000)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_math_ops.dense_bincount(
+              input=inp,
+              size=size,
+              weights=weights,
+              binary_output=binary_output))
+
 
 class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
@@ -366,7 +392,7 @@ def test_sparse_bincount_all_count(self, dtype):
     num_rows = 128
     size = 1000
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
 
     np_out = np.bincount(inp_vals, minlength=size)
@@ -390,7 +416,7 @@ def test_sparse_bincount_all_count_with_weights(self, dtype):
     num_rows = 128
     size = 1000
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
     inp_weight = np.random.random((n_elems,))
 
@@ -415,7 +441,7 @@ def test_sparse_bincount_all_binary(self, dtype):
     num_rows = 128
     size = 10
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
 
     np_out = np.ones((size,))
@@ -440,7 +466,7 @@ def test_sparse_bincount_all_binary_weights(self, dtype):
     num_rows = 128
     size = 10
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
     inp_weight = np.random.random((n_elems,))
 
@@ -532,6 +558,27 @@ def test_size_is_not_scalar(self):  # b/206619828
               weights=[0, 0],
               binary_output=False))
 
+  def test_sparse_bincount_input_validation(self):
+    np.random.seed(42)
+    num_rows = 128
+    size = 1000
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
+    inp_vals = np.random.randint(0, size, (n_elems,))
+
+    # Insert negative index.
+    inp_indices[10, 0] = -2
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of bounds"):
+      self.evaluate(
+          gen_math_ops.sparse_bincount(
+              indices=inp_indices,
+              values=inp_vals,
+              dense_shape=[num_rows],
+              size=size,
+              weights=[]))
+
 
 class RaggedBincountOpTest(test_util.TensorFlowTestCase,
                            parameterized.TestCase):
@@ -684,6 +731,18 @@ def test_size_is_not_scalar(self):  # b/206619828
               binary_output=False,
               name=None))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_splits_empty(self):  # b/238450914
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Splits must be non-empty"):
+      self.evaluate(
+          gen_math_ops.ragged_bincount(
+              splits=[],  # Invalid splits
+              values=[1],
+              size=1,
+              weights=[1],
+              binary_output=False,
+              name=None))
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/math_ops/sets_test.py b/tensorflow/python/kernel_tests/math_ops/sets_test.py
index 61a11606e661ed..a35214173d59b8 100644
--- a/tensorflow/python/kernel_tests/math_ops/sets_test.py
+++ b/tensorflow/python/kernel_tests/math_ops/sets_test.py
@@ -23,6 +23,7 @@
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_set_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sets
 from tensorflow.python.ops import sparse_ops
@@ -1303,6 +1304,18 @@ def test_set_union_output_is_sorted(self, dtype):
         result.values,
         _constant([1, 3, 5, 7, 9, 0, 2, 4, 5, 6, 6, 8, 9], dtype))
 
+  def test_raw_ops_setsize_invalid_shape(self):
+    with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                "Shape must be a 1D tensor"):
+      invalid_shape = 1
+      self.evaluate(
+          gen_set_ops.set_size(
+              set_indices=1,
+              set_values=[1, 1],
+              set_shape=invalid_shape,
+              validate_indices=True,
+              name=""))
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/BUILD b/tensorflow/python/kernel_tests/nn_ops/BUILD
index 3d2a6ce7288641..9b9a08ef84c096 100644
--- a/tensorflow/python/kernel_tests/nn_ops/BUILD
+++ b/tensorflow/python/kernel_tests/nn_ops/BUILD
@@ -149,6 +149,7 @@ cuda_py_test(
     shard_count = 2,
     tags = [
         "optonly",  # flaky timeouts unless optimized
+        "no_oss",  # TODO(b/258503209): Disable the test. 
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -483,6 +484,7 @@ cuda_py_test(
     srcs = ["pooling_ops_3d_test.py"],
     deps = [
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:nn_ops",
diff --git a/tensorflow/python/kernel_tests/nn_ops/conv3d_backprop_filter_v2_grad_test.py b/tensorflow/python/kernel_tests/nn_ops/conv3d_backprop_filter_v2_grad_test.py
index f6e84d3c339f3e..c92bb747acfa64 100644
--- a/tensorflow/python/kernel_tests/nn_ops/conv3d_backprop_filter_v2_grad_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/conv3d_backprop_filter_v2_grad_test.py
@@ -18,6 +18,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
@@ -58,6 +59,23 @@ def testGradient(self):
           err_tolerance = 1e-3
           self.assertLess(err, err_tolerance)
 
+  def testBadFilterShape(self):
+    strides = [1, 1, 1, 1, 1]
+    padding = "VALID"
+    tin = constant_op.constant(
+        .5053710941, shape=[2, 2, 2, 2, 1], dtype=dtypes.float32)
+    filter_sizes = constant_op.constant(0, shape=[], dtype=dtypes.int32)
+    out_backprop = constant_op.constant(
+        .5053710941, shape=[2, 2, 2, 2, 1], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      nn_ops.conv3d_backprop_filter_v2(
+          input=tin,
+          filter_sizes=filter_sizes,
+          out_backprop=out_backprop,
+          strides=strides,
+          padding=padding)
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py b/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
index 91a35f9002b1f1..265d1462793492 100644
--- a/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
@@ -32,6 +32,7 @@
 from tensorflow.python.layers import convolutional
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
@@ -759,6 +760,15 @@ def testConv2DExplicitPaddingWithDilations(self):
         padding=[[2, 1], [1, 2]],
         dilations=[2, 3])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2dOnlyPaddingReturnsZeros(self):
+    self._VerifyValues(
+        tensor_in_sizes=[1, 0, 2, 1],
+        filter_in_sizes=[1, 1, 1, 1],
+        strides=[1, 1],
+        padding=[[1, 1], [1, 1]],
+        expected=[0, 0, 0, 0, 0, 0, 0, 0])
+
   def testConv2DExplicitPaddingWithLayoutOptimizer(self):
     # Test with Grappler's layout optimizer, to ensure the layout optimizer
     # handles explicit padding correctly.
@@ -1103,6 +1113,23 @@ def testConv2DInputSizesContainsOnlySpatialDimensionsBackpropInput(self):
           use_gpu=use_gpu,
           err=1e-5)
 
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.disable_xla("b/239598470")
+  def testConv2DBackpropInputDegenerateBackpropInput(self):
+    input_sizes = [3, 1, 1, 2]
+    expected_output = np.zeros(input_sizes).flatten()
+    for (data_format, use_gpu) in GetTestConfigs():
+      self._RunAndVerifyBackpropInput(
+          input_sizes=input_sizes,
+          filter_sizes=[1, 3, 2, 3],
+          output_sizes=[3, 1, 0, 3],
+          strides=[1, 2],
+          padding="VALID",
+          expected=expected_output,
+          data_format=data_format,
+          use_gpu=use_gpu,
+          err=1e-5)
+
   # Testing for backprops
   def _RunAndVerifyBackpropFilter(self,
                                   input_sizes,
@@ -1293,7 +1320,7 @@ def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
     x2 = self._CreateNumpyTensor(filter_sizes)
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.cached_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu):
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -1339,7 +1366,7 @@ def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
     x2 = self._CreateNumpyTensor(filter_sizes)
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.cached_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu):
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -2602,6 +2629,27 @@ def testOpEdgeCases(self):
               strides=[1, 1, 1, 1],
               padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]))
 
+  def testConv2DBackpropInputInvalidOutBackpropRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        input_sizes = constant_op.constant([65534, 65534],
+                                           shape=[2],
+                                           dtype=dtypes.int32)
+        filters = constant_op.constant(
+            0.159749106, shape=[3, 3, 2, 2], dtype=dtypes.float32)
+        out_backprop = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+        t = gen_nn_ops.conv2d_backprop_input(
+            input_sizes=input_sizes,
+            filter=filters,
+            out_backprop=out_backprop,
+            strides=[1, 1, 1, 1],
+            padding="SAME",
+            use_cudnn_on_gpu=True,
+            explicit_paddings=[],
+            data_format="NHWC",
+            dilations=[1, 1, 1, 1])
+        self.evaluate(t)
+
 
 @test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
 class DepthwiseConv2DTest(test.TestCase):
@@ -2629,7 +2677,7 @@ def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.cached_session() as sess:
+    with self.cached_session():
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t1.set_shape(tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
@@ -2900,7 +2948,7 @@ def _CompareFwdConv2D(self, tensor_in_sizes, filter_in_sizes, conv_strides,
     x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
     x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
 
-    with self.cached_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       strides = [1] + conv_strides + [1]
@@ -3383,6 +3431,33 @@ def testAddWithSameSrcAndAddTensorBuffer(self):
         np.rint(expected_output),
         self.evaluate(add).reshape(-1))
 
+  # Fused resize and pad conv.
+  @test_util.run_in_graph_and_eager_modes()
+  def testResizeAndPadLargeResize(self):
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "Encountered overflow"):
+      mode = "REFLECT"
+      strides = [1, 1, 1, 1]
+      padding = "SAME"
+      resize_align_corners = False
+      tensor = constant_op.constant(
+          147, shape=[3, 3, 1, 4], dtype=dtypes.float32)
+      size = constant_op.constant([1879048192, 1879048192], dtype=dtypes.int32)
+      paddings = constant_op.constant([[0, 0], [0, 0], [0, 0], [0, 0]],
+                                      dtype=dtypes.int32)
+      kernel = constant_op.constant(
+          123, shape=[1, 3, 4, 1], dtype=dtypes.float32)
+      self.evaluate(
+          gen_nn_ops.fused_resize_and_pad_conv2d(
+              input=tensor,
+              size=size,
+              paddings=paddings,
+              filter=kernel,
+              mode=mode,
+              strides=strides,
+              padding=padding,
+              resize_align_corners=resize_align_corners))
+
 
 if __name__ == "__main__":
   for index, (input_size_, filter_size_, output_size_, stride_,
diff --git a/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
index 7b153ae1ed7084..59b20de84b4c4e 100644
--- a/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
@@ -333,6 +333,41 @@ def testNegativeSeqValuesForGradOp(self):
 
         self.evaluate(z)
 
+  def testPoolingRatioHasMoreDimThanInput(self):
+    with self.cached_session() as _:
+      with self.assertRaisesRegex(
+          errors.InvalidArgumentError,
+          r"Pooling ratio is higher than input dimension size for dimension 1.*"
+      ):
+        result = nn_ops.gen_nn_ops.fractional_avg_pool(
+            value=constant_op.constant(
+                value=[[[[1, 4, 2, 3]]]], dtype=dtypes.int64),
+            pooling_ratio=[1.0, 1.44, 1.73, 1.0],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+            name=None)
+        self.evaluate(result)
+
+  def testPoolingRatioValueOutOfRange(self):
+    with self.cached_session() as _:
+      # Whether turn on `TF2_BEHAVIOR` generates different error messages
+      with self.assertRaisesRegex(
+          (errors.InvalidArgumentError, ValueError),
+          r"(pooling_ratio cannot be smaller than 1, got: .*)|(is negative)"):
+        result = nn_ops.gen_nn_ops.fractional_avg_pool(
+            value=np.zeros([3, 30, 30, 3]),
+            pooling_ratio=[1, -1, 3, 1],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+        )
+        self.evaluate(result)
+
 
 class FractionalAvgPoolGradTest(test.TestCase):
   """Tests for FractionalAvgPoolGrad.
@@ -541,6 +576,27 @@ def testLargePoolingRatioThroughGradientError(self):
           delta=1e-2)
       self.assertLess(gradient_error, error_margin)
 
+  def testInvalidSeqRaiseErrorForFractionalAvgPoolGrad(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      with self.cached_session() as _:
+        overlapping = True
+        orig_input_tensor_shape = constant_op.constant(
+            -1879048192, shape=[4], dtype=dtypes.int64)
+        out_backprop = constant_op.constant([],
+                                            shape=[0, 0, 0, 0],
+                                            dtype=dtypes.float64)
+        row_pooling_sequence = constant_op.constant(
+            1, shape=[4], dtype=dtypes.int64)
+        col_pooling_sequence = constant_op.constant(
+            1, shape=[4], dtype=dtypes.int64)
+        t = gen_nn_ops.fractional_avg_pool_grad(
+            orig_input_tensor_shape=orig_input_tensor_shape,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
index 5acacdbb7463b2..9102973fa13b50 100644
--- a/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
@@ -124,7 +124,7 @@ def _ValidateFractionalMaxPoolResult(self, input_tensor, pooling_ratio,
     Returns:
       None
     """
-    with self.cached_session() as sess:
+    with self.cached_session():
       p, r, c = nn_ops.fractional_max_pool_v2(
           input_tensor,
           pooling_ratio,
@@ -155,7 +155,7 @@ def _testVisually(self):
           overlapping))
       rand_mat = self._PRNG.randint(10, size=tensor_shape)
       pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
-      with self.cached_session() as sess:
+      with self.cached_session():
         p, r, c = nn_ops.fractional_max_pool_v2(
             rand_mat,
             pooling_ratio,
@@ -320,7 +320,7 @@ def testDeterminismExceptionThrowing(self):
       nn_ops.fractional_max_pool(
           rand_mat, [1, 1.5, 1.5, 1], seed=1, seed2=1, deterministic=True)
 
-  def testPoolingRatio(self):
+  def testPoolingRatioHasMoreDimThanInput(self):
     with self.cached_session() as _:
       with self.assertRaisesRegex(
           errors.InvalidArgumentError,
@@ -338,6 +338,23 @@ def testPoolingRatio(self):
             name=None)
         self.evaluate(result)
 
+  def testPoolingRatioValueOutOfRange(self):
+    with self.cached_session() as _:
+      # Whether turn on `TF2_BEHAVIOR` generates different error messages
+      with self.assertRaisesRegex(
+          (errors.InvalidArgumentError, ValueError),
+          r"(pooling_ratio cannot be smaller than 1, got: .*)|(is negative)"):
+        result = nn_ops.gen_nn_ops.fractional_max_pool(
+            value=np.zeros([3, 30, 30, 3]),
+            pooling_ratio=[1, -1, 3, 1],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+        )
+        self.evaluate(result)
+
 
 class FractionalMaxPoolGradTest(test.TestCase):
   """Tests for FractionalMaxPoolGrad.
@@ -630,6 +647,47 @@ def testWhenRepeatedMaxValueInPoolingRegion(self):
       self.assertAllClose(expected_input_backprop_overlapping,
                           input_backprop_overlapping)
 
+  def testInvalidSeqRaiseErrorForFractionalMaxPoolGrad(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      with self.cached_session():
+        overlapping = True
+        orig_input = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        orig_output = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        out_backprop = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        row_pooling_sequence = constant_op.constant(
+            0, shape=[5], dtype=dtypes.int64)
+        col_pooling_sequence = constant_op.constant(
+            0, shape=[5], dtype=dtypes.int64)
+        t = gen_nn_ops.FractionalMaxPoolGrad(
+            orig_input=orig_input,
+            orig_output=orig_output,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
+  def testOverLargeSeqRaiseErrorForFractionalMaxPoolGrad(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      with self.cached_session():
+        overlapping = False
+        orig_input = [[[[1, 1, 1, 1, 1]]]]
+        orig_output = [[[[1, 1, 1]]]]
+        out_backprop = [[[[3], [3], [6]]]]
+        row_pooling_sequence = [-0x4000000, 1, 1]
+        col_pooling_sequence = [-0x4000000, 1, 1]
+        t = gen_nn_ops.FractionalMaxPoolGrad(
+            orig_input=orig_input,
+            orig_output=orig_output,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py b/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
index 9fb7724f695375..f44c7316845b21 100644
--- a/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
@@ -20,11 +20,13 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
@@ -111,6 +113,41 @@ def testGradientsZeroInput(self):
     self.assertAllClose(r, expected)
     self.assertShapeEqual(expected, grad)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testIncompatibleInputAndOutputImageShapes(self):
+    depth_radius = 1
+    bias = 1.59018219
+    alpha = 0.117728651
+    beta = 0.404427052
+    input_grads = random_ops.random_uniform(
+        shape=[4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    input_image = random_ops.random_uniform(
+        shape=[4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    invalid_output_image = random_ops.random_uniform(
+        shape=[4, 4, 4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      self.evaluate(
+          nn.lrn_grad(
+              input_grads=input_grads,
+              input_image=input_image,
+              output_image=invalid_output_image,
+              depth_radius=depth_radius,
+              bias=bias,
+              alpha=alpha,
+              beta=beta))
+
   def _RunAndVerifyGradients(self, dtype):
     with self.cached_session():
       # random shape
diff --git a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
index 71bf5d02bcb568..9c8f11eb743096 100644
--- a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
@@ -18,6 +18,7 @@
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
@@ -67,7 +68,7 @@ def _VerifyOneTest(self, pool_func, input_sizes, window, strides, padding,
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = [f * 1.0 for f in range(1, total_size + 1)]
-    with self.cached_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu):
       t = constant_op.constant(x, shape=input_sizes)
       window = [1] + list(window) + [1]
       strides = [1] + list(strides) + [1]
@@ -124,6 +125,23 @@ def testAvgPool3dSamePaddingDifferentStrides(self):
         padding="SAME",
         expected=expected_output)
 
+  def testMaxPool3dGrad(self):
+    with self.assertRaises(
+        (errors.ResourceExhaustedError, errors.InvalidArgumentError)):
+      with self.cached_session():
+        orig_input_shape = constant_op.constant(
+            1879048192, shape=[5], dtype=dtypes.int32)
+        grad = constant_op.constant(
+            1, shape=[1, 3, 2, 4, 2], dtype=dtypes.float32)
+        t = gen_nn_ops.AvgPool3DGrad(
+            orig_input_shape=orig_input_shape,
+            grad=grad,
+            ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1],
+            padding="SAME",
+            data_format="NDHWC")
+        self.evaluate(t)
+
   def testMaxPool3dValidPadding(self):
     expected_output = [40.0, 41.0, 42.0]
     self._VerifyValues(
diff --git a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
index aca29d05a3686d..e279f6e0027da2 100644
--- a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
@@ -537,6 +537,18 @@ def testAvgPoolEmptyInput(self, **kwargs):
         expected=[],
         **kwargs)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testRawAvgPoolLargeKsizeRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        t = gen_nn_ops.avg_pool(
+            value=np.ones([1, 1, 1, 1]),
+            ksize=[1, 1e20, 1, 1],
+            strides=[1, 1, 1, 1],
+            padding="SAME",
+            data_format="NHWC")
+        self.evaluate(t)
+
   @parameterized.parameters(
       GetTestConfigsDicts(nn_ops.max_pool, gen_nn_ops.max_pool_v2))
   @test_util.run_deprecated_v1
@@ -760,6 +772,18 @@ def testMaxPoolEmptyInput(self, **kwargs):
         expected=[],
         **kwargs)
 
+  @parameterized.parameters(
+      GetTestConfigsDicts(nn_ops.max_pool, gen_nn_ops.max_pool_v2))
+  @test_util.run_deprecated_v1
+  def testMaxPoolInvalidFilterSize(self, **kwargs):
+    with self.cached_session(use_gpu=test.is_gpu_available()):
+      t = constant_op.constant(1.0, shape=[1, 1, 1, 1])
+      with self.assertRaisesRegex(
+          (errors_impl.InvalidArgumentError, ValueError),
+          "Negative dimension size"):
+        t = self.evaluate(
+            nn_ops.max_pool(t, ksize=[1, 1, 2, 1], strides=1, padding="VALID"))
+
   # Tests for DepthwiseMaxPooling on CPU only.
   @parameterized.parameters(
       GetTestConfigsDicts(
@@ -2470,6 +2494,22 @@ def testMaxPoolGradWithArgmaxEagerShapeErrors(self):
               inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
               padding="VALID")
 
+  def testAvgPoolGradInvalidInputShapeRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        orig_input_shape = constant_op.constant(
+            -536870912, shape=[4], dtype=dtypes.int32)
+        grad = constant_op.constant(
+            .0890338004362538, shape=[1, 5, 7, 1], dtype=dtypes.float64)
+        t = gen_nn_ops.AvgPoolGrad(
+            orig_input_shape=orig_input_shape,
+            grad=grad,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding="VALID",
+            data_format="NHWC")
+        self.evaluate(t)
+
 
 def GetMaxPoolFwdTest(input_size, filter_size, strides, padding):
 
diff --git a/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py b/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
index 3d2eb53ecfd623..cf1c61d0dd25c6 100644
--- a/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
@@ -33,6 +33,7 @@
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import   array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_rnn_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -1324,6 +1325,88 @@ def testDynamicEquivalentToStaticRNN(self):
   def testDynamicEquivalentToStaticRNNWithSequenceLength(self):
     self._testDynamicEquivalentToStaticRNN(use_sequence_length=True)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testLSTMBlockCellErrorHandling(self):
+    forget_bias = 1
+    cell_clip = 0
+    use_peephole = False
+    x = constant_op.constant(0.837607, shape=[28, 29], dtype=dtypes.float32)
+    cs_prev = constant_op.constant(0, shape=[28, 17], dtype=dtypes.float32)
+    h_prev = constant_op.constant(
+        0.592631638, shape=[28, 17], dtype=dtypes.float32)
+    w = constant_op.constant(0.887386262, shape=[46, 68], dtype=dtypes.float32)
+    wci = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+    wcf = constant_op.constant(0, shape=[17], dtype=dtypes.float32)
+    wco = constant_op.constant(
+        0.592631638, shape=[28, 17], dtype=dtypes.float32)
+    b = constant_op.constant(0.75259006, shape=[68], dtype=dtypes.float32)
+    with self.assertRaises(errors_impl.InvalidArgumentError):
+      self.evaluate(
+          gen_rnn_ops.lstm_block_cell(
+              x=x,
+              cs_prev=cs_prev,
+              h_prev=h_prev,
+              w=w,
+              wci=wci,
+              wcf=wcf,
+              wco=wco,
+              b=b,
+              forget_bias=forget_bias,
+              cell_clip=cell_clip,
+              use_peephole=use_peephole))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLSTMBlockCellGradErrorHandling(self):
+    use_peephole = False
+    seq_len_max = constant_op.constant(1, shape=[], dtype=dtypes.int64)
+    x = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs_prev = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h_prev = constant_op.constant(
+        0.504355371, shape=[1, 1], dtype=dtypes.float32)
+    w = constant_op.constant(0.504355371, shape=[1, 1], dtype=dtypes.float32)
+    wci = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    wcf = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    wco = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    b = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    i = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    f = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    o = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    ci = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    co = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs_grad = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h_grad = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "must be rank"):
+      self.evaluate(
+          gen_rnn_ops.block_lstm_grad_v2(
+              seq_len_max=seq_len_max,
+              x=x,
+              cs_prev=cs_prev,
+              h_prev=h_prev,
+              w=w,
+              wci=wci,
+              wcf=wcf,
+              wco=wco,
+              b=b,
+              i=i,
+              cs=cs,
+              f=f,
+              o=o,
+              ci=ci,
+              co=co,
+              h=h,
+              cs_grad=cs_grad,
+              h_grad=h_grad,
+              use_peephole=use_peephole))
+
 
 class BidirectionalRNNTest(test.TestCase):
 
diff --git a/tensorflow/python/kernel_tests/quantization_ops/BUILD b/tensorflow/python/kernel_tests/quantization_ops/BUILD
new file mode 100644
index 00000000000000..ff0be9898c601d
--- /dev/null
+++ b/tensorflow/python/kernel_tests/quantization_ops/BUILD
@@ -0,0 +1,24 @@
+# Tests of TensorFlow quantization ops written using the Python API.
+
+# buildifier: disable=same-origin-load
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+    licenses = ["notice"],
+)
+
+tf_py_test(
+    name = "quantization_ops_test",
+    size = "small",
+    srcs = ["quantization_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//third_party/py/numpy",
+    ],
+)
diff --git a/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py b/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py
new file mode 100644
index 00000000000000..2a89c6c2f63750
--- /dev/null
+++ b/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py
@@ -0,0 +1,466 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf.quantize ops."""
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import googletest
+
+
+class FakeQuantWithMinMaxVarsOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars(
+              inputs=inputs, min=0.0, max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars(
+              inputs=inputs, min=[[1.0], [2.0], [4.0]], max=1.0))
+
+
+class FakeQuantWithMinMaxVarsPerChannelOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[[0.0]], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Dimensions must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[0.0, 0.1], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[1.0], max=[[1.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Dimensions must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[0.0], max=[1.0, 1.1]))
+
+
+class FakeQuantWithMinMaxVarsGradientOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    gradients = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be equal rank|must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_gradient(
+              gradients=gradients,
+              inputs=inputs,
+              min=0.0,
+              max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_gradient(
+              gradients=gradients,
+              inputs=inputs,
+              min=[[1.0], [2.0], [4.0]],
+              max=[[1.0], [2.0], [4.0]]))
+
+
+class FakeQuantWithMinMaxVarsPerChannelGradientOpTest(
+    test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    gradients = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Shapes must be equal rank|must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[[0.0]], max=[1.0]))
+
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "Dimension 0 in both shapes must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[0.0, 0.1], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Shapes must be equal rank|must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[1.0], max=[[1.0]]))
+
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "Dimension 0 in both shapes must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[0.0], max=[1.0, 1.1]))
+
+
+class QuantizedBiasedAddTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.qint8)
+    bias = constant_op.constant(np.int8(0), shape=[3], dtype=dtypes.qint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=[],
+              max_input=1.0,
+              min_bias=0.0,
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=[],
+              min_bias=0.0,
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=1.0,
+              min_bias=[],
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=1.0,
+              min_bias=0.0,
+              max_bias=[],
+              out_type=dtypes.qint32))
+
+
+class QuantizedInstanceNormOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.quantized_instance_norm(
+              x=inputs, x_min=0.0, x_max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.quantized_instance_norm(
+              x=inputs, x_min=[[1.0], [2.0], [4.0]], x_max=1.0))
+
+
+class QuantizedAvgPoolingOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    ksize = [1, 1, 1, 1]
+    strides = [1, 1, 1, 1]
+    padding = "SAME"
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_avg_pool(
+              input=inputs,
+              min_input=[],
+              max_input=1.0,
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_avg_pool(
+              input=inputs,
+              min_input=0.0,
+              max_input=[],
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+
+class QuantizedMaxPoolingOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    ksize = [1, 1, 1, 1]
+    strides = [1, 1, 1, 1]
+    padding = "SAME"
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_max_pool(
+              input=inputs,
+              min_input=[],
+              max_input=1.0,
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_max_pool(
+              input=inputs,
+              min_input=0.0,
+              max_input=[],
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+
+class RequantizeOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int32(0), shape=[3, 3, 3, 3], dtype=dtypes.qint32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=[],
+              input_max=1.0,
+              requested_output_min=0.0,
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=[],
+              requested_output_min=0.0,
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=1.0,
+              requested_output_min=[],
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=1.0,
+              requested_output_min=0.0,
+              requested_output_max=[],
+              out_type=dtypes.qint8))
+
+
+class QuantizedAddOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    x = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    y = constant_op.constant(np.int8(0), shape=[3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.quantized_add(
+              x=x,
+              y=y,
+              min_x=[],
+              max_x=1.0,
+              min_y=0.0,
+              max_y=1.0,
+              Toutput=dtypes.qint32))
+
+
+class QuantizedReluOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_relu(
+              features=inputs,
+              min_features=[],
+              max_features=127.0,
+              out_type=dtypes.quint8))
+
+
+class QuantizedRelu6OpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_relu6(
+              features=inputs,
+              min_features=[],
+              max_features=127.0,
+              out_type=dtypes.quint8))
+
+
+class QuantizeAndDequantizeV3OpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_valid(self):
+    with ops.Graph().as_default(), context.eager_mode():
+      input_value = constant_op.constant([-0.8, -0.5, 0, 0.3, 0.8, -2.0],
+                                         shape=(6,),
+                                         dtype=dtypes.float32),
+      input_min = constant_op.constant(-127, shape=(), dtype=dtypes.float32)
+      input_max = constant_op.constant(127, shape=(), dtype=dtypes.float32)
+      num_bits = constant_op.constant(8, shape=(), dtype=dtypes.int32)
+
+      quantized = array_ops.quantize_and_dequantize_v3(
+          input_value,
+          input_min,
+          input_max,
+          num_bits,
+          signed_input=True,
+          range_given=False)
+      self.assertSequenceAlmostEqual(
+          input_value[0].numpy(), quantized.numpy()[0], delta=0.05)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    input_value = constant_op.constant([-0.8, -0.5, 0, 0.3, 0.8, -2.0],
+                                       shape=(6,),
+                                       dtype=dtypes.float32),
+    input_min = constant_op.constant(-127, shape=(), dtype=dtypes.float32)
+    input_max = constant_op.constant(127, shape=(), dtype=dtypes.float32)
+    # Tensor with invalid shape and invalid number of elements.
+    num_bits = constant_op.constant([], shape=(0,), dtype=dtypes.int32)
+
+    # Test that running the op raises error. It raises different errors
+    # depending on whether the shape inference is run first or the op's
+    # Compute() is run first.
+    try:
+      array_ops.quantize_and_dequantize_v3(
+          input_value, input_min, input_max, num_bits, signed_input=True)
+    except Exception as ex:  # pylint: disable=broad-except
+      if isinstance(ex, errors.InvalidArgumentError):
+        self.assertRegex(str(ex), "The `num_bits` tensor should be a scalar.")
+      elif isinstance(ex, ValueError):
+        self.assertRegex(str(ex), "Shape must be rank 0")
+      else:
+        self.fail(
+            "Raised exception other than expected: %s. "
+            "Expected exceptions are errors.InvalidArgumentError or ValueError",
+            ex.__name__)
+    else:
+      self.fail(
+          "Did not raise an exception where it is expected to raise either "
+          "a ValueError or errors.InvalidArgumentError.")
+class QuantizeDownAndShrinkRangeOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int32(0), shape=[3, 3, 3, 3], dtype=dtypes.qint32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.quantize_down_and_shrink_range(input=inputs,
+                                                  input_min=[],
+                                                  input_max=4.0,
+                                                  out_type=dtypes.quint8))
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py b/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
index b70a30f46062c5..396843ace3ae0d 100644
--- a/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
@@ -18,6 +18,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
@@ -127,6 +128,27 @@ def draw(seed):
     # twice very rarely.
     self.assertLessEqual(num_same, 2)
 
+  def testCandidateOutOfRange(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of range"):
+      self.evaluate(
+          candidate_sampling_ops.log_uniform_candidate_sampler(
+              true_classes=[[0, 10]],
+              num_true=2,
+              num_sampled=1000,
+              unique=False,
+              range_max=2))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of range"):
+      self.evaluate(
+          candidate_sampling_ops.log_uniform_candidate_sampler(
+              true_classes=[[0, -10]],
+              num_true=2,
+              num_sampled=1000,
+              unique=False,
+              range_max=2))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py b/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
index a1c6072abfda49..8ad859230edcc6 100644
--- a/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
+++ b/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
@@ -303,6 +303,29 @@ def testSamplingWithSmallStdDevFarFromBound(self):
       self.assertAllGreater(samples, 0.)
       self.assertAllGreater(samples_stateless, 0.)
 
+  def testShapeTypes(self):
+    for shape_dtype in [np.int32, np.int64]:
+      shape = np.array([1000], dtype=shape_dtype)
+      sample_op = random_ops.parameterized_truncated_normal(
+          shape=shape, means=0.0, stddevs=0.1, minvals=-1., maxvals=1.)
+      new_seed = random_ops.random_uniform([2],
+                                           seed=1234,
+                                           minval=0,
+                                           maxval=(2**31 - 1),
+                                           dtype=np.int32)
+      sample_op_stateless = stateless.stateless_parameterized_truncated_normal(
+          shape=shape,
+          seed=new_seed,
+          means=0.0,
+          stddevs=0.1,
+          minvals=-1.,
+          maxvals=1.)
+
+      samples = self.evaluate(sample_op)
+      stateless_samples = self.evaluate(sample_op_stateless)
+      self.assertAllEqual(samples.shape, shape)
+      self.assertAllEqual(stateless_samples.shape, shape)
+
   def testStatelessParameterizedTruncatedNormalHasGrads(self):
     mean = variables.Variable(0.01)
     stddev = variables.Variable(1.)
diff --git a/tensorflow/python/kernel_tests/random/random_gamma_test.py b/tensorflow/python/kernel_tests/random/random_gamma_test.py
index 71b06da183f332..4d3ea4fe5284e6 100644
--- a/tensorflow/python/kernel_tests/random/random_gamma_test.py
+++ b/tensorflow/python/kernel_tests/random/random_gamma_test.py
@@ -16,7 +16,10 @@
 
 import numpy as np
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
@@ -216,6 +219,16 @@ def testPositive(self):
         self.assertEqual(0, math_ops.reduce_sum(math_ops.cast(
             math_ops.less_equal(x, 0.), dtype=dtypes.int64)).eval())
 
+  def testSizeTooLarge(self):
+    # Grappler asserts on size overflow, so this error is only caught when
+    # running eagerly.
+    if context.executing_eagerly():
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "overflow"):
+        rate = constant_op.constant(1.0, shape=(4, 4, 4, 4, 4))
+        self.evaluate(
+            random_ops.random_gamma(
+                shape=[46902, 51188, 34063, 59195], alpha=rate))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/random/random_poisson_test.py b/tensorflow/python/kernel_tests/random/random_poisson_test.py
index 9f21f91ed5728f..c0470e6029aa99 100644
--- a/tensorflow/python/kernel_tests/random/random_poisson_test.py
+++ b/tensorflow/python/kernel_tests/random/random_poisson_test.py
@@ -17,6 +17,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.kernel_tests.random import util
@@ -171,6 +172,14 @@ def testInfRate(self):
     sample = random_ops.random_poisson(shape=[2], lam=np.inf)
     self.assertAllEqual([np.inf, np.inf], self.evaluate(sample))
 
+  def testSizeTooLarge(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "overflow"):
+      rate = constant_op.constant(1.0, shape=(4, 4, 4, 4, 4))
+      self.evaluate(
+          random_ops.random_poisson(
+              shape=[46902, 51188, 34063, 59195], lam=rate))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/signal/fft_ops_test.py b/tensorflow/python/kernel_tests/signal/fft_ops_test.py
index a0e2b438a66ee4..67f8a47cef2d9e 100644
--- a/tensorflow/python/kernel_tests/signal/fft_ops_test.py
+++ b/tensorflow/python/kernel_tests/signal/fft_ops_test.py
@@ -609,6 +609,15 @@ def test_grad_random(self, rank, extra_dims, size, np_rtype):
         self._tf_ifft_for_rank(rank), re, im, result_is_complex=False,
         rtol=tol, atol=tol)
 
+  def test_invalid_args(self):
+    # Test case for GitHub issue 55263
+    a = np.empty([6, 0])
+    b = np.array([1, -1])
+    with self.assertRaisesRegex(errors.InvalidArgumentError, "must >= 0"):
+      with self.session():
+        v = fft_ops.rfft2d(input_tensor=a, fft_length=b)
+        self.evaluate(v)
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class FFTShiftTest(test.TestCase, parameterized.TestCase):
diff --git a/tensorflow/python/kernel_tests/sparse_ops/BUILD b/tensorflow/python/kernel_tests/sparse_ops/BUILD
index e07724a94a284c..52236a913506b6 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/BUILD
+++ b/tensorflow/python/kernel_tests/sparse_ops/BUILD
@@ -261,7 +261,10 @@ cuda_py_test(
     name = "sparse_xent_op_deterministic_test",
     size = "medium",
     srcs = ["sparse_xent_op_deterministic_test.py"],
-    tags = ["no_windows"],  # Fails as SegmentSum is nondeterministic on Windows
+    tags = [
+        "no_windows",  # Fails as SegmentSum is nondeterministic on Windows
+        "no_oss",  # TODO(b/258503209): Disable the test. 
+    ],
     xla_enable_strict_auto_jit = False,
     deps = [
         ":sparse_xent_op_test_base",
diff --git a/tensorflow/python/kernel_tests/sparse_ops/sparse_add_op_test.py b/tensorflow/python/kernel_tests/sparse_ops/sparse_add_op_test.py
index 61ad45fb5e273e..821184af5b5699 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/sparse_add_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops/sparse_add_op_test.py
@@ -189,7 +189,6 @@ def testSparseTensorDenseAddGradients(self):
                                                     [(nnz,), (n, m)], s, (n, m))
       self.assertLess(err, 1e-3)
 
-  @test_util.run_deprecated_v1
   def testInvalidSparseTensor(self):
     with test_util.force_cpu():
       shape = [2, 2]
@@ -201,12 +200,49 @@ def testInvalidSparseTensor(self):
           [[1, 3]],  # ...so is 3.
       ]:
         sparse = sparse_tensor.SparseTensorValue(bad_idx, val, shape)
-        s = sparse_ops.sparse_add(sparse, dense)
-
-        with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
-                                    "invalid index"):
+        with self.assertRaisesRegex(
+            (ValueError, errors_impl.InvalidArgumentError), "invalid index"):
+          s = sparse_ops.sparse_add(sparse, dense)
           self.evaluate(s)
 
+  def _testSparseDenseInvalidInputs(self,
+                                    a_indices,
+                                    a_values,
+                                    a_shape,
+                                    b,
+                                    expected_error=""):
+    # Public API call to sparse-dense add.
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                expected_error):
+      a = sparse_tensor.SparseTensor(a_indices, a_values, a_shape)
+      self.evaluate(sparse_ops.sparse_add(a, b))
+    # Directly call generated kernel, by-passing SparseTensor validation.
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                expected_error):
+      self.evaluate(
+          sparse_ops.gen_sparse_ops.sparse_tensor_dense_add(
+              a_indices, a_values, a_shape, b))
+
+  def testSparseDenseInvalidInputs(self):
+    self._testSparseDenseInvalidInputs(
+        a_indices=constant_op.constant(0, shape=[17, 2], dtype=dtypes.int64),
+        a_values=constant_op.constant(0, shape=[5], dtype=dtypes.float32),
+        a_shape=constant_op.constant([3, 4], dtype=dtypes.int64),
+        b=constant_op.constant(1, shape=[3, 4], dtype=dtypes.float32),
+        expected_error="Dimensions 17 and 5 are not compatible")
+    self._testSparseDenseInvalidInputs(
+        a_indices=constant_op.constant(0, shape=[17, 4], dtype=dtypes.int64),
+        a_values=constant_op.constant(0, shape=[17], dtype=dtypes.float32),
+        a_shape=constant_op.constant([3, 4], dtype=dtypes.int64),
+        b=constant_op.constant(1, shape=[3, 4], dtype=dtypes.float32),
+        expected_error="Dimensions 4 and 2 are not compatible")
+    self._testSparseDenseInvalidInputs(
+        a_indices=constant_op.constant(7, shape=[17, 2], dtype=dtypes.int64),
+        a_values=constant_op.constant(0, shape=[17], dtype=dtypes.float32),
+        a_shape=constant_op.constant([3, 4], dtype=dtypes.int64),
+        b=constant_op.constant(1, shape=[3, 4], dtype=dtypes.float32),
+        expected_error="invalid index")
+
 ######################## Benchmarking code
 
 
diff --git a/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
index 28a133d982feaa..94f170454d7b5f 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
@@ -873,6 +873,14 @@ def test_all_columns_empty(self):
     with self.cached_session():
       self._assert_sparse_tensor_empty(self.evaluate(out))
 
+  def testNonScalarInput(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'Input separator should be a scalar.'):
+      self.evaluate(sparse_ops.sparse_cross(
+          inputs=[],
+          name='a',
+          separator=constant_op.constant(['a', 'b'], dtype=dtypes.string)))
+
 
 class SparseCrossHashedOpTest(BaseSparseCrossOpTest):
 
diff --git a/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
index 4972d1d25f08f1..c1ceac68040318 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
@@ -514,6 +514,13 @@ def testFillNumber(self):
         self.assertAllEqual(empty_row_indicator_out,
                             np.array([0, 0, 1, 0, 1]).astype(np.bool_))
 
+  def testSparseFillEmptyRowsGradEmpty(self):
+    with test_util.use_gpu():
+      grad, _ = self.evaluate(
+          sparse_ops.sparse_fill_empty_rows_grad(
+              reverse_index_map=[], grad_values=[]))
+      self.assertAllEqual(grad, [])
+
   @test_util.run_deprecated_v1
   def testFillFloat(self):
     with self.session():
@@ -665,7 +672,7 @@ def testInvalidIndices(self):
 class SparseAddTest(test_util.TensorFlowTestCase):
 
   def testValuesInVariable(self):
-    indices = constant_op.constant([[1]], dtype=dtypes.int64)
+    indices = constant_op.constant([[0]], dtype=dtypes.int64)
     values = variables.Variable([1], trainable=False, dtype=dtypes.float32)
     shape = constant_op.constant([1], dtype=dtypes.int64)
 
diff --git a/tensorflow/python/kernel_tests/summary_ops/summary_ops_test.py b/tensorflow/python/kernel_tests/summary_ops/summary_ops_test.py
index 2e422ab734bdc8..18a3d1da706fbe 100644
--- a/tensorflow/python/kernel_tests/summary_ops/summary_ops_test.py
+++ b/tensorflow/python/kernel_tests/summary_ops/summary_ops_test.py
@@ -976,10 +976,12 @@ def testFlushFunction(self):
         self.assertEqual(3, get_total())
         summary_ops.flush(writer=writer)
         self.assertEqual(4, get_total())
-        summary_ops.write('tag', 1, step=0)
-        self.assertEqual(4, get_total())
-        summary_ops.flush(writer=writer._resource)  # pylint:disable=protected-access
-        self.assertEqual(5, get_total())
+
+  # Regression test for b/228097117.
+  def testFlushFunction_disallowsInvalidWriterInput(self):
+    with context.eager_mode():
+      with self.assertRaisesRegex(ValueError, 'Invalid argument to flush'):
+        summary_ops.flush(writer=())
 
   @test_util.assert_no_new_tensors
   def testNoMemoryLeak_graphMode(self):
diff --git a/tensorflow/python/lib/core/BUILD b/tensorflow/python/lib/core/BUILD
index 4755454a4e5fa9..54654b7eb43bbf 100644
--- a/tensorflow/python/lib/core/BUILD
+++ b/tensorflow/python/lib/core/BUILD
@@ -82,6 +82,7 @@ cc_library(
     deps = [
         ":bfloat16_lib",
         ":numpy_lib",
+        ":py_util",
         "//tensorflow/c:c_api_no_xla",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
index 03ff77100d2e45..7b78a1ce180baf 100644
--- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
@@ -13,8 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+// clang-format off
 // Must be included first.
 #include "tensorflow/python/lib/core/numpy.h"
+// clang-format on
+
+#include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 #include <vector>
 
@@ -22,7 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/python/lib/core/bfloat16.h"
-#include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
+#include "tensorflow/python/lib/core/py_util.h"
 
 namespace tensorflow {
 
@@ -200,8 +204,28 @@ Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
     return s;
   }
 
+  if (dim_size > NPY_MAXDIMS) {
+    return errors::InvalidArgument(
+        "Cannot convert tensor with ", dim_size,
+        " dimensions to NumPy array. NumPy arrays can have at most ",
+        NPY_MAXDIMS, " dimensions");
+  }
   auto* np_array = reinterpret_cast<PyArrayObject*>(
       PyArray_SimpleNewFromData(dim_size, dims, type_num, data));
+  if (np_array == nullptr) {
+    string shape_str = absl::StrJoin(
+        absl::Span<npy_intp>{dims, static_cast<size_t>(dim_size)}, ", ");
+    if (PyErr_Occurred()) {
+      string exception_str = PyExceptionFetch();
+      PyErr_Clear();
+      return errors::InvalidArgument(
+          "Failed to create numpy array from tensor of shape [", shape_str,
+          "]. Numpy error: ", exception_str);
+    }
+    return errors::Internal(
+        "Failed to create numpy array from tensor of shape [", shape_str, "]");
+  }
+
   PyArray_CLEARFLAGS(np_array, NPY_ARRAY_OWNDATA);
   if (PyType_Ready(&TensorReleaserType) == -1) {
     return errors::Unknown("Python type initialization failed.");
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index d0e3440bbe9bdf..5372ed03869a2a 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -83,8 +83,8 @@ bool IsCPUDevice(const Device* d) {
   return d == nullptr || d->tensorflow_gpu_device_info() == nullptr;
 }
 
-// Givens the 'call', prepares the token and inputs as a python tuple
-// that is appropriate for calling the trampoline.
+// Given the 'call', prepares the token and inputs as a python tuple that is
+// appropriate for calling the trampoline.
 Status MakeArgTuple(const PyCall* call, TFE_Context* ctx, PyObject** tuple) {
   int64_t n = call->ins.size();
   PyObject* lst = PyList_New(n);
@@ -119,7 +119,11 @@ Status MakeArgTuple(const PyCall* call, TFE_Context* ctx, PyObject** tuple) {
     PyList_SetItem(lst, i, arg);
   }
   *tuple = Py_BuildValue("(ssN)", call->token.c_str(), device_name, lst);
-  CHECK(*tuple);
+  if (*tuple == nullptr) {
+    return errors::Internal(
+        "Failed to create python tuple. Please make sure `token` is a "
+        "well-formed UTF-8 string.");
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/python/ops/batch_ops_test.py b/tensorflow/python/ops/batch_ops_test.py
index 7ef5e06fb6757d..15a1a71a93bb63 100644
--- a/tensorflow/python/ops/batch_ops_test.py
+++ b/tensorflow/python/ops/batch_ops_test.py
@@ -20,7 +20,9 @@
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -30,6 +32,7 @@
 from tensorflow.python.ops import gen_batch_ops
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import variables
@@ -233,6 +236,26 @@ def worker():
       self.assertEqual(thread_results[0], [2])
       self.assertEqual(main_results[0], [3])
 
+  def testUnbatchInvalidIdArg(self):
+    """Tests that unbatch work together."""
+    if context.executing_eagerly():
+      batched_tensor = constant_op.constant(
+          value=np.random.random(size=(3, 3, 1)), dtype=dtypes.float64)
+      batched_index = constant_op.constant(
+          value=np.random.randint(0, 100, size=(3, 3, 1)), dtype=dtypes.int64)
+      arg_id = constant_op.constant(
+          value=np.random.randint(0, 100, size=(3, 3, 1)), dtype=dtypes.int64)
+
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  "Input id should be scalar;"):
+        batch_ops.unbatch(
+            batched_tensor=batched_tensor,
+            batch_index=batched_index,
+            id=arg_id,
+            timeout_micros=50,
+            container="",
+            shared_name="")
+
   def testBatchDecoratedWithCapturedInput(self):
     """Tests that the batch_function decorator works."""
     if context.executing_eagerly():
@@ -557,6 +580,56 @@ def worker():
       # The thread's call should hit the timeout, and thus get 0 results.
       self.assertEqual(len(thread_results), 0)
 
+  def testUnbatchGradInvalidId(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=constant_op.constant([1]),
+              batch_index=constant_op.constant([
+                  [0, 0, 0],
+              ], dtype=dtypes.int64),
+              grad=constant_op.constant([
+                  1,
+              ]),
+              id=constant_op.constant([
+                  1,
+                  1,
+              ], dtype=dtypes.int64)))
+
+  def testUnbatchGradInvalidBatchId(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=constant_op.constant([1]),
+              batch_index=constant_op.constant([
+                  [0, 0],
+              ], dtype=dtypes.int64),
+              grad=constant_op.constant([
+                  1,
+              ]),
+              id=constant_op.constant([
+                  1,
+              ], dtype=dtypes.int64)))
+
+  def testUnbatchGradInvalidArgs(self):
+    original_input = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.float64, maxval=None)
+    batch_index = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.int64, maxval=65536)
+    grad = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.float64, maxval=None)
+    batch_id = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.int64, maxval=65536)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=original_input,
+              batch_index=batch_index,
+              grad=grad,
+              id=batch_id,
+              container="",
+              shared_name="",
+              name=""))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py
index a95896c0a23a07..5b57956996b8ac 100644
--- a/tensorflow/python/ops/collective_ops_test.py
+++ b/tensorflow/python/ops/collective_ops_test.py
@@ -451,6 +451,20 @@ def testCollectiveGroupSizeMismatch(self):
     ])
     context.ensure_initialized()
 
+  @test_util.run_v2_only
+  def testCollectiveGatherShapeCheckFailure(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'input should have rank > 0'):
+      collective_ops.gen_collective_ops.CollectiveGather(
+          input=1,
+          group_size=1,
+          group_key=1,
+          instance_key=1,
+          shape=(3, 3, 3),
+          communication_hint='auto',
+          timeout_seconds=0,
+          name='')
+
     @def_function.function
     def run_all_reduce():
       group_key = 10
diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index ed3b37df3b4b4f..ce3d44c045f8db 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -1738,7 +1738,7 @@ def _check_put_dtypes(self, vals, indices=None):
 
     # Sanity check number of values
     if not len(vals) <= len(self._dtypes):
-      raise ValueError(f"Unexpected number of inputs {len(vals)} vs"
+      raise ValueError(f"Unexpected number of inputs {len(vals)} vs "
                        f"{len(self._dtypes)}")
 
     tensors = []
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index 05534c338b58c1..405facf51d94e9 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -2304,6 +2304,29 @@ def testInvalidInput(self):
         self.evaluate(v)
 
 
+class ImageProjectiveTransformV2(test_util.TensorFlowTestCase):
+
+  def testShapeTooLarge(self):
+    interpolation = "BILINEAR"
+    fill_mode = "REFLECT"
+    images = constant_op.constant(
+        0.184634328, shape=[2, 5, 8, 3], dtype=dtypes.float32)
+    transforms = constant_op.constant(
+        0.378575385, shape=[2, 8], dtype=dtypes.float32)
+    output_shape = constant_op.constant([1879048192, 1879048192],
+                                        shape=[2],
+                                        dtype=dtypes.int32)
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r"Encountered overflow when multiplying"):
+      self.evaluate(
+          gen_image_ops.ImageProjectiveTransformV2(
+              images=images,
+              transforms=transforms,
+              output_shape=output_shape,
+              interpolation=interpolation,
+              fill_mode=fill_mode))
+
+
 class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase):
 
   def _testSampleDistortedBoundingBox(self, image, bounding_box,
@@ -4005,6 +4028,25 @@ def testPad(self):
     self._assertReturns(x, x_shape, y, y_shape)
 
 
+class ResizeNearestNeighborGrad(test_util.TensorFlowTestCase):
+
+  def testSizeTooLarge(self):
+    align_corners = True
+    half_pixel_centers = False
+    grads = constant_op.constant(1, shape=[1, 8, 16, 3], dtype=dtypes.float16)
+    size = constant_op.constant([1879048192, 1879048192],
+                                shape=[2],
+                                dtype=dtypes.int32)
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r"Encountered overflow when multiplying"):
+      self.evaluate(
+          gen_image_ops.ResizeNearestNeighborGrad(
+              grads=grads,
+              size=size,
+              align_corners=align_corners,
+              half_pixel_centers=half_pixel_centers))
+
+
 class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
 
   def _ResizeImageWithCropOrPad(self, x, target_height, target_width,
diff --git a/tensorflow/python/ops/quantized_conv_ops_test.py b/tensorflow/python/ops/quantized_conv_ops_test.py
index 2d485d0ecb2568..971abf339f75bf 100644
--- a/tensorflow/python/ops/quantized_conv_ops_test.py
+++ b/tensorflow/python/ops/quantized_conv_ops_test.py
@@ -18,6 +18,8 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.platform import test
 
@@ -196,6 +198,71 @@ def testConv2D2x2FilterStride2Same(self):
         padding="SAME",
         expected=expected_output)
 
+  def _testBadInputSize(self,
+                        tin=None,
+                        tfilter=None,
+                        min_input=None,
+                        max_input=None,
+                        min_filter=None,
+                        max_filter=None,
+                        error_regex=""):
+    strides = [1, 1, 1, 1]
+    padding = "SAME"
+    if tin is None:
+      tin = math_ops.cast(
+          constant_op.constant(1, shape=[1, 2, 3, 3]), dtype=dtypes.quint8)
+
+    if tfilter is None:
+      tfilter = math_ops.cast(
+          constant_op.constant(1, shape=[1, 2, 3, 3]), dtype=dtypes.quint8)
+
+    if min_input is None:
+      min_input = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+
+    if max_input is None:
+      max_input = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+
+    if min_filter is None:
+      min_filter = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+
+    if max_filter is None:
+      max_filter = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                error_regex):
+      self.evaluate(
+          nn_ops.quantized_conv2d(
+              tin,
+              tfilter,
+              out_type=dtypes.qint32,
+              strides=strides,
+              padding=padding,
+              min_input=min_input,
+              max_input=max_input,
+              min_filter=min_filter,
+              max_filter=max_filter))
+
+  def testBadInputSizes(self):
+    self._testBadInputSize(
+        tin=math_ops.cast(
+            constant_op.constant(1, shape=[1, 2]), dtype=dtypes.quint8),
+        error_regex="must be rank 4")
+    self._testBadInputSize(
+        tfilter=math_ops.cast(
+            constant_op.constant(1, shape=[1, 2]), dtype=dtypes.quint8),
+        error_regex="must be rank 4")
+    self._testBadInputSize(
+        min_input=constant_op.constant(0, shape=[1], dtype=dtypes.float32),
+        error_regex="must be rank 0")
+    self._testBadInputSize(
+        max_input=constant_op.constant(0, shape=[1], dtype=dtypes.float32),
+        error_regex="must be rank 0")
+    self._testBadInputSize(
+        min_filter=constant_op.constant(0, shape=[1], dtype=dtypes.float32),
+        error_regex="must be rank 0")
+    self._testBadInputSize(
+        max_filter=constant_op.constant(0, shape=[1], dtype=dtypes.float32),
+        error_regex="must be rank 0")
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/ragged/ragged_factory_ops.py b/tensorflow/python/ops/ragged/ragged_factory_ops.py
index a1906c469beb46..457b3a04618a81 100644
--- a/tensorflow/python/ops/ragged/ragged_factory_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_factory_ops.py
@@ -188,6 +188,9 @@ def _constant_value(ragged_factory, inner_factory, pylist, dtype, ragged_rank,
     if max_depth > scalar_depth:
       raise ValueError("Invalid pylist=%r: empty list nesting is greater "
                        "than scalar value nesting" % pylist)
+    if ragged_rank is not None and max_depth < ragged_rank:
+      raise ValueError(f"Invalid pylist={pylist}, max depth smaller than "
+                       f"ragged_rank={ragged_rank}")
 
   # If both inner_shape and ragged_rank were specified, then check that
   # they are compatible with pylist.
diff --git a/tensorflow/python/ops/ragged/ragged_range_op_test.py b/tensorflow/python/ops/ragged/ragged_range_op_test.py
index 8465fb25997b8b..aaf48bf9783785 100644
--- a/tensorflow/python/ops/ragged/ragged_range_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_range_op_test.py
@@ -84,8 +84,7 @@ def testBroadcast(self):
          list(range(5, 15, 3))])
 
     # Broadcast all arguments.
-    self.assertAllEqual(
-        ragged_math_ops.range(0, 5, 1), [list(range(0, 5, 1))])
+    self.assertAllEqual(ragged_math_ops.range(0, 5, 1), [list(range(0, 5, 1))])
 
   def testEmptyRanges(self):
     rt1 = ragged_math_ops.range([0, 5, 3], [0, 3, 5])
@@ -108,6 +107,10 @@ def testKernelErrors(self):
                                 r'Requires delta != 0'):
       self.evaluate(ragged_math_ops.range(0, 0, 0))
 
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r'Requires \(\(limit - start\) / delta\) <='):
+      self.evaluate(ragged_math_ops.range(0.1, 1e10, 1e-10))
+
   def testShape(self):
     self.assertAllEqual(
         ragged_math_ops.range(0, 0, 1).shape.as_list(), [1, None])
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index b48f26e6ca4372..f553de46a64f86 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -1442,6 +1442,21 @@ def testUnbatchVariantInDataset(self):
         for i in range(3):
           self.assertAllEqual(sess.run(rt[i]), out)
 
+  def testToVariantInvalidParams(self):
+    self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                           r'be rank 1 but is rank 0',
+                           gen_ragged_conversion_ops.ragged_tensor_to_variant,
+                           rt_nested_splits=[0, 1, 2],
+                           rt_dense_values=[0, 1, 2],
+                           batched_input=True)
+
+    self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                           r'be rank 1 but is rank 2',
+                           gen_ragged_conversion_ops.ragged_tensor_to_variant,
+                           rt_nested_splits=[[[0]], [[1]], [[2]]],
+                           rt_dense_values=[0, 1, 2],
+                           batched_input=True)
+
   def testFromVariantInvalidParams(self):
     rt = ragged_factory_ops.constant([[0], [1], [2], [3]])
     batched_variant = rt._to_variant(batched_input=True)
diff --git a/tensorflow/python/ops/script_ops_test.py b/tensorflow/python/ops/script_ops_test.py
index ebb54228347397..03ae644f0d0769 100644
--- a/tensorflow/python/ops/script_ops_test.py
+++ b/tensorflow/python/ops/script_ops_test.py
@@ -15,8 +15,11 @@
 """Tests for script operations."""
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import gen_script_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
@@ -33,6 +36,82 @@ def plus(a, b):
     expect_result = constant_op.constant(3, dtypes.int32)
     self.assertAllEqual(actual_result, expect_result)
 
+  def test_stateless(self):
+    call_count = 0
+
+    def plus(a, b):
+      nonlocal call_count
+      call_count += 1
+      return a + b
+
+    @def_function.function
+    def numpy_func_stateless(a, b):
+      return numpy_function(plus, [a, b], dtypes.int32, stateful=False)
+
+    @def_function.function
+    def func_stateless(a, b):
+      sum1 = numpy_func_stateless(a, b)
+      sum2 = numpy_func_stateless(a, b)
+      return sum1 + sum2
+
+    self.evaluate(func_stateless(
+        constant_op.constant(1),
+        constant_op.constant(2),
+    ))
+
+    self.assertIn(call_count, (1, 2))  # as stateless, func may be deduplicated
+
+  def test_stateful(self):
+    call_count = 0
+
+    def plus(a, b):
+      nonlocal call_count
+      call_count += 1
+      return a + b
+
+    @def_function.function
+    def numpy_func_stateful(a, b):
+      return numpy_function(plus, [a, b], dtypes.int32, stateful=True)
+
+    @def_function.function
+    def func_stateful(a, b):
+      sum1 = numpy_func_stateful(a, b)
+      sum2 = numpy_func_stateful(a, b)
+      return sum1 + sum2
+
+    self.evaluate(func_stateful(
+        constant_op.constant(1),
+        constant_op.constant(2),
+    ))
+
+    self.assertEqual(call_count,
+                     2)  # as stateful, func is guaranteed to execute twice
+
+
+class PyFunctionTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_variable_arguments(self):
+
+    def plus(a, b):
+      return a + b
+
+    v1 = resource_variable_ops.ResourceVariable(1)
+    self.evaluate(v1.initializer)
+
+    actual_result = script_ops.eager_py_func(plus, [v1, 2], dtypes.int32)
+    expect_result = constant_op.constant(3, dtypes.int32)
+    self.assertAllEqual(actual_result, expect_result)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_fail_on_non_utf8_token(self):
+    value = constant_op.constant(value=[1, 2])
+    token = b"\xb0"
+    data_type = [dtypes.int32]
+    with self.assertRaises((errors.InternalError, UnicodeDecodeError)):
+      self.evaluate(
+          gen_script_ops.py_func(input=[value], token=token, Tout=data_type))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/sobol_ops_test.py b/tensorflow/python/ops/sobol_ops_test.py
index f026e4434fae7c..24abf790704ae0 100644
--- a/tensorflow/python/ops/sobol_ops_test.py
+++ b/tensorflow/python/ops/sobol_ops_test.py
@@ -16,9 +16,12 @@
 import numpy as np
 
 from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
@@ -126,5 +129,15 @@ def test_default_dtype(self):
     s = math_ops.sobol_sample(10, 100)
     self.assertEqual(dtypes.float32, s.dtype)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_non_scalar_input(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r'Shape must be rank 0 but is rank 1|'
+                                r'\w+ must be a scalar'):
+      self.evaluate(gen_math_ops.sobol_sample(
+          dim=7,
+          num_results=constant_op.constant([1, 0]),
+          skip=constant_op.constant([1])))
+
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index e65cf43cdacb17..29b9c5ff2cab7d 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -1109,12 +1109,35 @@ def flush(writer=None, name=None):
   Returns:
     The created `tf.Operation`.
   """
+  del name  # unused
   if writer is None:
     writer = _summary_state.writer
     if writer is None:
       return control_flow_ops.no_op()
   if isinstance(writer, SummaryWriter):
     return writer.flush()
+  raise ValueError("Invalid argument to flush(): %r" % (writer,))
+
+
+def legacy_raw_flush(writer=None, name=None):
+  """Legacy version of flush() that accepts a raw resource tensor for `writer`.
+
+  Do not use this function in any new code. Not supported and not part of the
+  public TF APIs.
+
+  Args:
+    writer: The `tf.summary.SummaryWriter` to flush. If None, the current
+      default writer will be used instead; if there is no current writer, this
+      returns `tf.no_op`. For this legacy version only, also accepts a raw
+      resource tensor pointing to the underlying C++ writer resource.
+    name: Ignored legacy argument for a name for the operation.
+
+  Returns:
+    The created `tf.Operation`.
+  """
+  if writer is None or isinstance(writer, SummaryWriter):
+    # Forward to the TF2 implementation of flush() when possible.
+    return flush(writer, name)
   else:
     # Legacy fallback in case we were passed a raw resource tensor.
     with ops.device("cpu:0"):
diff --git a/tensorflow/python/platform/BUILD b/tensorflow/python/platform/BUILD
index 5e77b619185988..bc8231eceacda5 100644
--- a/tensorflow/python/platform/BUILD
+++ b/tensorflow/python/platform/BUILD
@@ -159,6 +159,7 @@ tf_py_test(
     tags = [
         "no_windows",
         "nomac",
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         ":client_testlib",
diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py
index dd37a1e7a4da53..0329ba1c784f43 100644
--- a/tensorflow/python/summary/summary_test.py
+++ b/tensorflow/python/summary/summary_test.py
@@ -23,6 +23,7 @@
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -183,6 +184,11 @@ def testAudioSummaryWithFamily(self):
         'family/outer/family/inner/audio/{}'.format(i) for i in range(3))
     self.assertEqual(tags, expected)
 
+  def testAudioSummaryWithInvalidSampleRate(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      invalid_sample_rate = [22000.0, 22000.0]
+      self.evaluate(summary_lib.audio('', [[1.0]], invalid_sample_rate))
+
   @test_util.run_deprecated_v1
   def testTextSummary(self):
     with self.cached_session():
diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index b41007071f54c9..8661c4a2339dd5 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -671,7 +671,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str,
   tensor_key_feed_dict = {}
 
   inputs = preprocess_inputs_arg_string(inputs_str)
-  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str, safe=False)
+  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str)
   input_examples = preprocess_input_examples_arg_string(input_examples_str)
 
   for input_tensor_key, (filename, variable_name) in inputs.items():
diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py
index 1517599a6f3099..5d42a0f6d324be 100644
--- a/tensorflow/python/tools/saved_model_cli_test.py
+++ b/tensorflow/python/tools/saved_model_cli_test.py
@@ -486,43 +486,6 @@ def testInputParserPickle(self):
     self.assertTrue(np.all(feed_dict['y'] == pkl1))
     self.assertTrue(np.all(feed_dict['z'] == pkl2))
 
-  def testInputParserPythonExpression(self):
-    x1 = np.ones([2, 10])
-    x2 = np.array([[1], [2], [3]])
-    x3 = np.mgrid[0:5, 0:5]
-    x4 = [[3], [4]]
-    input_expr_str = ('x1=np.ones([2,10]);x2=np.array([[1],[2],[3]]);'
-                      'x3=np.mgrid[0:5,0:5];x4=[[3],[4]]')
-    feed_dict = saved_model_cli.load_inputs_from_input_arg_string(
-        '', input_expr_str, '')
-    self.assertTrue(np.all(feed_dict['x1'] == x1))
-    self.assertTrue(np.all(feed_dict['x2'] == x2))
-    self.assertTrue(np.all(feed_dict['x3'] == x3))
-    self.assertTrue(np.all(feed_dict['x4'] == x4))
-
-  def testInputParserBoth(self):
-    x0 = np.array([[1], [2]])
-    input_path = os.path.join(test.get_temp_dir(), 'input.npz')
-    np.savez(input_path, a=x0)
-    x1 = np.ones([2, 10])
-    input_str = 'x0=' + input_path + '[a]'
-    input_expr_str = 'x1=np.ones([2,10])'
-    feed_dict = saved_model_cli.load_inputs_from_input_arg_string(
-        input_str, input_expr_str, '')
-    self.assertTrue(np.all(feed_dict['x0'] == x0))
-    self.assertTrue(np.all(feed_dict['x1'] == x1))
-
-  def testInputParserBothDuplicate(self):
-    x0 = np.array([[1], [2]])
-    input_path = os.path.join(test.get_temp_dir(), 'input.npz')
-    np.savez(input_path, a=x0)
-    x1 = np.ones([2, 10])
-    input_str = 'x0=' + input_path + '[a]'
-    input_expr_str = 'x0=np.ones([2,10])'
-    feed_dict = saved_model_cli.load_inputs_from_input_arg_string(
-        input_str, input_expr_str, '')
-    self.assertTrue(np.all(feed_dict['x0'] == x1))
-
   def testInputParserErrorNoName(self):
     x0 = np.array([[1], [2]])
     x1 = np.array(range(5))
@@ -623,7 +586,7 @@ def testRunCommandInvalidInputKeyError(self):
     base_path = test.test_src_dir_path(SAVED_MODEL_PATH)
     args = self.parser.parse_args([
         'run', '--dir', base_path, '--tag_set', 'serve', '--signature_def',
-        'regress_x2_to_y3', '--input_exprs', 'x2=np.ones((3,1))'
+        'regress_x2_to_y3', '--input_exprs', 'x2=[1,2,3]'
     ])
     with self.assertRaises(ValueError):
       saved_model_cli.run(args)
@@ -633,7 +596,7 @@ def testRunCommandInvalidSignature(self):
     base_path = test.test_src_dir_path(SAVED_MODEL_PATH)
     args = self.parser.parse_args([
         'run', '--dir', base_path, '--tag_set', 'serve', '--signature_def',
-        'INVALID_SIGNATURE', '--input_exprs', 'x2=np.ones((3,1))'
+        'INVALID_SIGNATURE', '--input_exprs', 'x2=[1,2,3]'
     ])
     with self.assertRaisesRegex(ValueError,
                                 'Could not find signature "INVALID_SIGNATURE"'):
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 61490437ba4452..0a3afef8368866 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -56,7 +56,7 @@ def register_extension_info(**kwargs):
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.8.0"
+VERSION = "2.8.4"
 VERSION_MAJOR = VERSION.split(".")[0]
 two_gpu_tags = ["requires-gpu-nvidia:2", "notap", "manual", "no_pip"]
 
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 6530cb0652ce5e..6d0053f926009f 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -277,6 +277,10 @@ function install_macos_pip_deps_no_venv {
   # First, upgrade pypi wheels
   ${PIP_CMD} install --user --upgrade setuptools pip wheel
 
+  # See https://github.com/pypa/setuptools/issues/3293
+  # Must happen first, before anyhting else
+  ${PIP_CMD} install --user --upgrade 'importlib-metadata > 4'
+
   # LINT.IfChange(mac_pip_installations)
   # Remove any historical keras package if they are installed.
   ${PIP_CMD} list
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 58de3ad9c40daf..9e1db54414bb3f 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -46,7 +46,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.8.0'
+_VERSION = '2.8.4'
 
 
 # We use the same setup.py for all tensorflow_* packages and for the nightly
@@ -81,7 +81,14 @@
     'libclang >= 9.0.1',
     'numpy >= 1.20',
     'opt_einsum >= 2.3.2',
-    'protobuf >= 3.9.2',
+    # TODO(b/182876485): Protobuf 3.20 results in linker errors on Windows
+    # Protobuf 4.0 is binary incompatible with what C++ TF uses.
+    # We need ~1 quarter to update properly.
+    # See also: https://github.com/tensorflow/tensorflow/issues/53234
+    # See also: https://github.com/protocolbuffers/protobuf/issues/9954
+    # See also: https://github.com/tensorflow/tensorflow/issues/56077
+    # This is a temporary patch for now, to patch previous TF releases.
+    'protobuf >= 3.9.2, < 3.20',
     'setuptools',
     'six >= 1.12.0',
     'termcolor >= 1.1.0',
@@ -92,7 +99,7 @@
     # They are updated during the release process
     # When updating these, please also update the nightly versions below
     'tensorboard >= 2.8, < 2.9',
-    'tf-estimator-nightly == 2.8.0.dev2021122109',
+    'tensorflow-estimator >= 2.8, < 2.9',
     'keras >= 2.8.0rc0, < 2.9',
     'tensorflow-io-gcs-filesystem >= 0.23.1',
 ]
diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl
index 60deadb19623b1..0129ff7f4203e9 100644
--- a/tensorflow/workspace2.bzl
+++ b/tensorflow/workspace2.bzl
@@ -299,10 +299,10 @@ def _tf_repositories():
     tf_http_archive(
         name = "org_sqlite",
         build_file = "//third_party:sqlite.BUILD",
-        sha256 = "999826fe4c871f18919fdb8ed7ec9dd8217180854dd1fe21eea96aed36186729",
-        strip_prefix = "sqlite-amalgamation-3360000",
+        sha256 = "9c99955b21d2374f3a385d67a1f64cbacb1d4130947473d25c77ad609c03b4cd",
+        strip_prefix = "sqlite-amalgamation-3390400",
         system_build_file = "//third_party/systemlibs:sqlite.BUILD",
-        urls = tf_mirror_urls("https://www.sqlite.org/2021/sqlite-amalgamation-3360000.zip"),
+        urls = tf_mirror_urls("https://www.sqlite.org/2022/sqlite-amalgamation-3390400.zip"),
     )
 
     tf_http_archive(
@@ -490,10 +490,10 @@ def _tf_repositories():
     tf_http_archive(
         name = "curl",
         build_file = "//third_party:curl.BUILD",
-        sha256 = "370b11201349816287fb0ccc995e420277fbfcaf76206e309b3f60f0eda090c2",
-        strip_prefix = "curl-7.79.1",
+        sha256 = "3c6893d38d054d4e378267166858698899e9d87258e8ff1419d020c395384535",
+        strip_prefix = "curl-7.84.0",
         system_build_file = "//third_party/systemlibs:curl.BUILD",
-        urls = tf_mirror_urls("https://curl.haxx.se/download/curl-7.79.1.tar.gz"),
+        urls = tf_mirror_urls("https://curl.haxx.se/download/curl-7.84.0.tar.gz"),
     )
 
     # WARNING: make sure ncteisen@ and vpai@ are cc-ed on any CL to change the below rule
@@ -559,13 +559,14 @@ def _tf_repositories():
         urls = tf_mirror_urls("https://github.com/google/boringssl/archive/80ca9f9f6ece29ab132cce4cf807a9465a18cfac.tar.gz"),
     )
 
+    # Note: if you update this, you have to update libpng too. See cl/437813808
     tf_http_archive(
         name = "zlib",
         build_file = "//third_party:zlib.BUILD",
-        sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1",
-        strip_prefix = "zlib-1.2.11",
+        sha256 = "91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9",
+        strip_prefix = "zlib-1.2.12",
         system_build_file = "//third_party/systemlibs:zlib.BUILD",
-        urls = tf_mirror_urls("https://zlib.net/zlib-1.2.11.tar.gz"),
+        urls = tf_mirror_urls("https://zlib.net/zlib-1.2.12.tar.gz"),
     )
 
     # LINT.IfChange
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 6a41034e0bd027..508de07e36b246 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -50,8 +50,6 @@ cc_library(
         "lib/config-os400.h",
         "lib/config-plan9.h",
         "lib/config-riscos.h",
-        "lib/config-tpf.h",
-        "lib/config-vxworks.h",
         "lib/config-win32.h",
         "lib/config-win32ce.h",
         "lib/conncache.c",
@@ -123,12 +121,15 @@ cc_library(
         "lib/easyif.h",
         "lib/easyoptions.c",
         "lib/easyoptions.h",
+        "lib/easy_lock.h",
         "lib/escape.c",
         "lib/escape.h",
         "lib/file.c",
         "lib/file.h",
         "lib/fileinfo.c",
         "lib/fileinfo.h",
+        "lib/fopen.c",
+        "lib/fopen.h",
         "lib/formdata.c",
         "lib/formdata.h",
         "lib/ftp.c",
@@ -140,12 +141,14 @@ cc_library(
         "lib/getinfo.h",
         "lib/gopher.c",
         "lib/gopher.h",
+        "lib/h2h3.c",
+        "lib/h2h3.h",
         "lib/hash.c",
         "lib/hash.h",
+        "lib/headers.c",
+        "lib/headers.h",
         "lib/hmac.c",
         "lib/hostasyn.c",
-        "lib/hostcheck.c",
-        "lib/hostcheck.h",
         "lib/hostip.c",
         "lib/hostip.h",
         "lib/hostip4.c",
@@ -195,12 +198,8 @@ cc_library(
         "lib/multiif.h",
         "lib/netrc.c",
         "lib/netrc.h",
-        "lib/non-ascii.c",
-        "lib/non-ascii.h",
         "lib/nonblock.c",
         "lib/nonblock.h",
-        #"lib/nwlib.c",
-        #"lib/nwos.c",
         "lib/openldap.c",
         "lib/parsedate.c",
         "lib/parsedate.h",
@@ -262,6 +261,8 @@ cc_library(
         "lib/telnet.h",
         "lib/tftp.c",
         "lib/tftp.h",
+        "lib/timediff.c",
+        "lib/timediff.h",
         "lib/timeval.c",
         "lib/timeval.h",
         "lib/transfer.c",
@@ -278,8 +279,6 @@ cc_library(
         "lib/warnless.h",
         "lib/wildcard.c",
         "lib/wildcard.h",
-        "lib/x509asn1.c",
-        "lib/x509asn1.h",
         "lib/vauth/cleartext.c",
         "lib/vauth/cram.c",
         "lib/vauth/digest.c",
@@ -294,6 +293,8 @@ cc_library(
         "lib/vauth/spnego_sspi.c",
         "lib/vauth/vauth.c",
         "lib/vauth/vauth.h",
+        "lib/vquic/msh3.c",
+        "lib/vquic/msh3.h",
         "lib/vquic/ngtcp2.c",
         "lib/vquic/ngtcp2.h",
         "lib/vquic/quiche.c",
@@ -310,14 +311,14 @@ cc_library(
         "lib/vtls/gskit.h",
         "lib/vtls/gtls.c",
         "lib/vtls/gtls.h",
+        "lib/vtls/hostcheck.c",
+        "lib/vtls/hostcheck.h",
         "lib/vtls/keylog.c",
         "lib/vtls/keylog.h",
         "lib/vtls/mbedtls.c",
         "lib/vtls/mbedtls.h",
         "lib/vtls/mbedtls_threadlock.c",
         "lib/vtls/mbedtls_threadlock.h",
-        "lib/vtls/mesalink.c",
-        "lib/vtls/mesalink.h",
         "lib/vtls/nss.c",
         "lib/vtls/nssg.h",
         "lib/vtls/openssl.c",
@@ -332,6 +333,8 @@ cc_library(
         "lib/vtls/vtls.h",
         "lib/vtls/wolfssl.c",
         "lib/vtls/wolfssl.h",
+        "lib/vtls/x509asn1.c",
+        "lib/vtls/x509asn1.h",
     ] + select({
         "@org_tensorflow//tensorflow:macos": [
             "lib/vtls/sectransp.c",
@@ -347,6 +350,7 @@ cc_library(
         "include/curl/curl.h",
         "include/curl/curlver.h",
         "include/curl/easy.h",
+        "include/curl/header.h",
         "include/curl/mprintf.h",
         "include/curl/multi.h",
         "include/curl/options.h",
@@ -447,8 +451,6 @@ cc_binary(
         "src/tool_cb_wrt.h",
         "src/tool_cfgable.c",
         "src/tool_cfgable.h",
-        "src/tool_convert.c",
-        "src/tool_convert.h",
         "src/tool_dirhie.c",
         "src/tool_dirhie.h",
         "src/tool_doswin.c",
diff --git a/third_party/png.BUILD b/third_party/png.BUILD
index 719d4c7c670fc6..145b0dff05e38a 100644
--- a/third_party/png.BUILD
+++ b/third_party/png.BUILD
@@ -61,7 +61,7 @@ genrule(
     name = "snappy_stubs_public_h",
     srcs = ["scripts/pnglibconf.h.prebuilt"],
     outs = ["pnglibconf.h"],
-    cmd = "sed -e 's/PNG_ZLIB_VERNUM 0/PNG_ZLIB_VERNUM 0x12b0/' $< >$@",
+    cmd = "sed -e 's/PNG_ZLIB_VERNUM 0/PNG_ZLIB_VERNUM 0x12c0/' $< >$@",
 )
 
 config_setting(