diff --git a/.bazelrc b/.bazelrc
index b6a72c1e5d357a..f95e43475375e2 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -195,6 +195,8 @@ build:ios_armv7 --config=ios
 build:ios_armv7 --cpu=ios_armv7
 build:ios_arm64 --config=ios
 build:ios_arm64 --cpu=ios_arm64
+build:ios_sim_arm64 --config=ios
+build:ios_sim_arm64 --cpu=ios_sim_arm64
 build:ios_i386 --config=ios
 build:ios_i386 --cpu=ios_i386
 build:ios_x86_64 --config=ios
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
deleted file mode 100644
index a61d022d5e4485..00000000000000
--- a/.github/workflows/cffconvert.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-name: cffconvert
-
-on:
-  push:
-    paths:
-      - CITATION.cff
-
-jobs:
-  validate:
-    name: "validate"
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check out a copy of the repository
-        uses: actions/checkout@v2
-
-      - name: Check whether the citation metadata from CITATION.cff is valid
-        uses: citation-file-format/cffconvert-github-action@2.0.0
-        with:
-          args: "--validate"
diff --git a/.github/workflows/create_issue.js b/.github/workflows/create_issue.js
deleted file mode 100644
index eca20953f8484a..00000000000000
--- a/.github/workflows/create_issue.js
+++ /dev/null
@@ -1,64 +0,0 @@
-/**
- * @license
- * Copyright 2021 Google LLC. All Rights Reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================================
- */
-
-/** Extracts PR from commit message and creates a GitHub Issue on Rollback of PR
-  Created issue is assigned to original PR owner and reviewer.
-
-  @param {!object}
-    github enables querying for PR and also create issue using rest endpoint
-    context has the commit message details in the payload
-  @return {string} Returns the issue number and title
-*/
-module.exports = async ({github, context}) => {
-  const rollback_commit = context.payload.head_commit.id;
-  const pr_match_groups = context.payload.head_commit.message.match(/\Rollback of PR #(\d+).*/) || [];
-  if (pr_match_groups.length != 2) {
-    console.log(`PR Number not found in ${context.payload.head_commit.message}`);
-    throw "Error extracting PR Number from commit message";
-  }
-  const pr_number = parseInt(pr_match_groups[1]);
-  const owner = context.payload.repository.owner.name;
-  const repo = context.payload.repository.name;
-  console.log(`Original PR: ${pr_number} and Rollback Commit: ${rollback_commit}`);
-  // Get the Original PR Details
-  const pr_resp = await github.rest.pulls.get({
-    owner,
-    repo,
-    pull_number: pr_number
-  });
-  if (pr_resp.status != 200 || pr_resp.data.state != 'closed') {
-    console.log(`PR:{pr_number} is not found or closed.  Not a valid condition to create an issue.`);
-    console.log(pr_resp);
-    throw `PR:{pr_number} needs to be valid and closed (merged)`;
-  }
-  const pr_title = pr_resp.data.title;
-  // Assign to PR owner and reviewers
-  const assignees = pr_resp.data.assignees.concat(pr_resp.data.requested_reviewers);
-  let assignee_logins = assignees.map(x => x.login);
-  assignee_logins.push(pr_resp.data.user.login);
-  console.log(assignee_logins);
-  // Create an new GH Issue and reference the Original PR
-  const resp = await github.rest.issues.create({
-    owner,
-    repo,
-    assignees: assignee_logins,
-    title: `Issue created for Rollback of PR #${pr_number}: ${pr_title}`,
-    body: `Merged PR #${pr_number} is rolled back in ${rollback_commit}.
-    Please follow up with the reviewer and close this issue once its resolved.`
-  });
-  return `Issue created: ${resp.data.number} with Title: ${resp.data.title}`;
-};
diff --git a/.github/workflows/issue-on-pr-rollback.yml b/.github/workflows/issue-on-pr-rollback.yml
deleted file mode 100644
index ce0182bedc2937..00000000000000
--- a/.github/workflows/issue-on-pr-rollback.yml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-name: Creates a GitHub Issue when a PR Rolled back via Commit to Master
-on:
-  push:
-    branches:
-      - master
-
-jobs:
-  create-issue-on-pr-rollback:
-    runs-on: ubuntu-latest
-    if: |
-      github.repository == 'tensorflow/tensorflow' &&
-      startsWith(github.event.head_commit.message, 'Rollback of PR #')
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v2
-      - name: Create a new Github Issue
-        uses: actions/github-script@v5
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          script: |
-            const script = require('./.github/workflows/create_issue.js')
-            console.log(await script({github, context}))
diff --git a/.github/workflows/pylint-presubmit.yml b/.github/workflows/pylint-presubmit.yml
deleted file mode 100644
index b469f047f4d423..00000000000000
--- a/.github/workflows/pylint-presubmit.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-name: PyLint
-on:
-  pull_request:
-    paths:
-      - '**.py'
-
-jobs:
-  build:
-    name: PyLint
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
-    - name: Get file changes
-      id: get_file_changes
-      uses: trilom/file-changes-action@v1.2.4
-      with:
-        output: ' '
-    - name: Report list of changed files
-      run: |
-        echo Changed files: ${{ steps.get_file_changes.outputs.files }}
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: "3.9"
-    - name: Install Python dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint numpy wheel
-        pip install keras_preprocessing --no-deps
-    - name: Run PyLint on changed files
-      run: |
-        echo "${{ steps.get_file_changes.outputs.files}}" | tr " " "\n" | grep ".py$" | xargs pylint --rcfile=tensorflow/tools/ci_build/pylintrc
diff --git a/.github/workflows/scorecards-analysis.yml b/.github/workflows/scorecards-analysis.yml
deleted file mode 100644
index 3694dd0171f97a..00000000000000
--- a/.github/workflows/scorecards-analysis.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-name: Scorecards supply-chain security
-on:
-  # Only the default branch is supported.
-  branch_protection_rule:
-  schedule:
-    - cron: '44 15 * * 5'
-  push:
-    branches: [ master ]
-
-# Declare default permissions as read only.
-permissions: read-all
-
-jobs:
-  analysis:
-    if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks
-    name: Scorecards analysis
-    runs-on: ubuntu-latest
-    permissions:
-      # Needed to upload the results to code-scanning dashboard.
-      security-events: write
-      actions: read
-      contents: read
-
-    steps:
-      - name: "Checkout code"
-        uses: actions/checkout@ec3a7ce113134d7a93b817d10a8272cb61118579 # v2.4.0
-        with:
-          persist-credentials: false
-
-      - name: "Run analysis"
-        uses: ossf/scorecard-action@0fe1afdc40f536c78e3dc69147b91b3ecec2cc8a # v1.0.0
-        with:
-          results_file: results.sarif
-          results_format: sarif
-          # Read-only PAT token. To create it,
-          # follow the steps in https://github.com/ossf/scorecard-action#pat-token-creation.
-          repo_token: ${{ secrets.SCORECARD_READ_TOKEN }}
-          # Publish the results to enable scorecard badges. For more details, see
-          # https://github.com/ossf/scorecard-action#publishing-results.
-          # For private repositories, `publish_results` will automatically be set to `false`,
-          # regardless of the value entered here.
-          publish_results: true
-
-      # Upload the results as artifacts (optional).
-      - name: "Upload artifact"
-        uses: actions/upload-artifact@82c141cc518b40d92cc801eee768e7aafc9c2fa2 # v2.3.1
-        with:
-          name: SARIF file
-          path: results.sarif
-          retention-days: 5
-
-      # Upload the results to GitHub's code scanning dashboard.
-      - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # v1.0.26
-        with:
-          sarif_file: results.sarif
diff --git a/.github/workflows/update-nightly.yml b/.github/workflows/update-nightly.yml
deleted file mode 100644
index 0265ffbebe2ec0..00000000000000
--- a/.github/workflows/update-nightly.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-on:
-  workflow_dispatch:  # Allow manual triggers
-  schedule:
-    - cron: 0 4 * * *  # 4am UTC is 9pm PDT and 8pm PST
-name: Set nightly branch to master HEAD
-jobs:
-  master-to-nightly:
-    if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks
-    runs-on: ubuntu-latest
-    steps:
-    - uses: zofrex/mirror-branch@v1
-      name: Set nightly branch to master HEAD
-      with:
-        target-branch: 'nightly'
diff --git a/.github/workflows/update-rbe.yml b/.github/workflows/update-rbe.yml
deleted file mode 100644
index 8302612623b81a..00000000000000
--- a/.github/workflows/update-rbe.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-
-# This Workflow updates tensorflow/tools/toolchains/remote_config/configs.bzl
-# to reference the most recent versions of the SIG Build Docker images.
-name: Update RBE Configs
-on:
-  workflow_dispatch:
-
-jobs:
-  rbe:
-    name: Update RBE Configs
-    runs-on: ubuntu-latest
-    if: github.repository == 'tensorflow/tensorflow' # Don't do this in forks
-    steps:
-    - name: Checkout code
-      uses: actions/checkout@v2
-    - name: Update the RBE Configs
-      run: |
-        function map() {
-          # The "digest" that allows us to pull an image is not the digest as
-          # returned by the API, but a sha256sum of the entire chunk of image
-          # metadata. gcr.io helpfully includes it in the header of the response
-          # as docker-content-digest: sha256:[digest]. Note we use egrep to
-          # match exactly sha256:<hash> because curl may include a ^M symbol at
-          # the end of the line.
-          # See https://cloud.google.com/architecture/using-container-images#exploring_image_manifests_digests_and_tags
-          digest=$(curl -s --head "https://gcr.io/v2/tensorflow-sigs/build/manifests/$2" | egrep -o "sha256:[[:alnum:]]*")
-          # Find the line matching the regex "sigbuild-r2.9" (with quotes) and
-          # replace just the digest portion in it
-          sed -i"" "/\"$1\"/ s/sha256:[[:alnum:]]*/$digest/g" tensorflow/tools/toolchains/remote_config/configs.bzl
-        }
-        # See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/toolchains/remote_config/configs.bzl
-        # This is a mapping of name_container_map keys under sigbuild_tf_configs
-        # to tag names on gcr.io/tensorflow-sigs/build.
-        map sigbuild-r2.9 latest-python3.9
-        map sigbuild-r2.9-python3.7 latest-python3.7
-        map sigbuild-r2.9-python3.8 latest-python3.8
-        map sigbuild-r2.9-python3.9 latest-python3.9
-        map sigbuild-r2.9-python3.10 latest-python3.10
-    - name: Create Pull Request with changes
-      uses: peter-evans/create-pull-request@v3
-      with:
-        title: Update the RBE images to the latest container versions
-        committer: TensorFlow Release Automation <jenkins@tensorflow.org>
-        token: ${{ secrets.JENKINS_TOKEN }}
-        reviewers: angerson,mihaimaruseac
-        body: |
-          This PR was created by a GitHub Actions workflow to update all the SIG Build-based RBE containers to the most recent containers. See:
-
-          - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/toolchains/remote_config/configs.bzl
-          - https://github.com/tensorflow/tensorflow/blob/master/.github/workflows/update-rbe.yml
diff --git a/RELEASE.md b/RELEASE.md
index b2cd63195e254e..a085ad7a99bdc2 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,103 @@
+# Release 2.9.3
+
+This release introduces several vulnerability fixes:
+
+*   Fixes an overflow in `tf.keras.losses.poisson` ([CVE-2022-41887](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41887))
+*   Fixes a heap OOB failure in `ThreadUnsafeUnigramCandidateSampler` caused by missing validation ([CVE-2022-41880](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41880))
+*   Fixes a segfault in `ndarray_tensor_bridge` ([CVE-2022-41884](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41884))
+*   Fixes an overflow in `FusedResizeAndPadConv2D` ([CVE-2022-41885](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41885))
+*   Fixes a overflow in `ImageProjectiveTransformV2` ([CVE-2022-41886](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41886))
+*   Fixes an FPE in `tf.image.generate_bounding_box_proposals` on GPU ([CVE-2022-41888](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41888))
+*   Fixes a segfault in `pywrap_tfe_src` caused by invalid attributes ([CVE-2022-41889](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41889))
+*   Fixes a `CHECK` fail in `BCast` ([CVE-2022-41890](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41890))
+*   Fixes a segfault in `TensorListConcat` ([CVE-2022-41891](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41891))
+*   Fixes a `CHECK_EQ` fail in `TensorListResize` ([CVE-2022-41893](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41893))
+*   Fixes an overflow in `CONV_3D_TRANSPOSE` on TFLite ([CVE-2022-41894](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41894))
+*   Fixes a heap OOB in `MirrorPadGrad` ([CVE-2022-41895](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41895))
+*   Fixes a crash in `Mfcc` ([CVE-2022-41896](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41896))
+*   Fixes a heap OOB in `FractionalMaxPoolGrad` ([CVE-2022-41897](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41897))
+*   Fixes a `CHECK` fail in `SparseFillEmptyRowsGrad` ([CVE-2022-41898](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41898))
+*   Fixes a `CHECK` fail in `SdcaOptimizer` ([CVE-2022-41899](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41899))
+*   Fixes a heap OOB in `FractionalAvgPool` and `FractionalMaxPool`([CVE-2022-41900](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41900))
+*   Fixes a `CHECK_EQ` in `SparseMatrixNNZ` ([CVE-2022-41901](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41901))
+*   Fixes an OOB write in grappler ([CVE-2022-41902](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41902))
+*   Fixes a overflow in `ResizeNearestNeighborGrad` ([CVE-2022-41907](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41907))
+*   Fixes a `CHECK` fail in `PyFunc` ([CVE-2022-41908](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41908))
+*   Fixes a segfault in `CompositeTensorVariantToComponents` ([CVE-2022-41909](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41909))
+*   Fixes a invalid char to bool conversion in printing a tensor ([CVE-2022-41911](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41911))
+*   Fixes a heap overflow in `QuantizeAndDequantizeV2` ([CVE-2022-41910](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-41910))
+*   Fixes a `CHECK` failure in `SobolSample` via missing validation ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+*   Fixes a `CHECK` fail in `TensorListScatter` and `TensorListScatterV2` in eager mode ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+
+
+# Release 2.9.2
+
+This releases introduces several vulnerability fixes:
+
+*   Fixes a `CHECK` failure in tf.reshape caused by overflows ([CVE-2022-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35934))
+*   Fixes a `CHECK` failure in `SobolSample` caused by missing validation ([CVE-2022-35935](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35935))
+*   Fixes an OOB read in `Gather_nd` op in TF Lite ([CVE-2022-35937](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35937))
+*   Fixes a `CHECK` failure in `TensorListReserve` caused by missing validation ([CVE-2022-35960](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35960))
+*   Fixes an OOB write in `Scatter_nd` op in TF Lite ([CVE-2022-35939](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35939))
+*   Fixes an integer overflow in `RaggedRangeOp` ([CVE-2022-35940](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35940))
+*   Fixes a `CHECK` failure in `AvgPoolOp` ([CVE-2022-35941](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35941))
+*   Fixes a `CHECK` failures in `UnbatchGradOp` ([CVE-2022-35952](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35952))
+*   Fixes a segfault TFLite converter on per-channel quantized transposed convolutions ([CVE-2022-36027](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36027))
+*   Fixes a `CHECK` failures in `AvgPool3DGrad` ([CVE-2022-35959](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35959))
+*   Fixes a `CHECK` failures in `FractionalAvgPoolGrad` ([CVE-2022-35963](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35963))
+*   Fixes a segfault in `BlockLSTMGradV2` ([CVE-2022-35964](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35964))
+*   Fixes a segfault in `LowerBound` and `UpperBound` ([CVE-2022-35965](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35965))
+*   Fixes a segfault in `QuantizedAvgPool` ([CVE-2022-35966](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35966))
+*   Fixes a segfault in `QuantizedAdd` ([CVE-2022-35967](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35967))
+*   Fixes a `CHECK` fail in `AvgPoolGrad` ([CVE-2022-35968](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35968))
+*   Fixes a `CHECK` fail in `Conv2DBackpropInput` ([CVE-2022-35969](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35969))
+*   Fixes a segfault in `QuantizedInstanceNorm` ([CVE-2022-35970](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35970))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVars` ([CVE-2022-35971](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35971))
+*   Fixes a segfault in `Requantize` ([CVE-2022-36017](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36017))
+*   Fixes a segfault in `QuantizedBiasAdd` ([CVE-2022-35972](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35972))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsPerChannel` ([CVE-2022-36019](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36019))
+*   Fixes a segfault in `QuantizedMatMul` ([CVE-2022-35973](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35973))
+*   Fixes a segfault in `QuantizeDownAndShrinkRange` ([CVE-2022-35974](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35974))
+*   Fixes segfaults in `QuantizedRelu` and `QuantizedRelu6` ([CVE-2022-35979](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35979))
+*   Fixes a `CHECK` fail in `FractionalMaxPoolGrad` ([CVE-2022-35981](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35981))
+*   Fixes a `CHECK` fail in `RaggedTensorToVariant` ([CVE-2022-36018](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36018))
+*   Fixes a `CHECK` fail in `QuantizeAndDequantizeV3` ([CVE-2022-36026](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36026))
+*   Fixes a segfault in `SparseBincount` ([CVE-2022-35982](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35982))
+*   Fixes a `CHECK` fail in `Save` and `SaveSlices` ([CVE-2022-35983](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35983))
+*   Fixes a `CHECK` fail in `ParameterizedTruncatedNormal` ([CVE-2022-35984](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35984))
+*   Fixes a `CHECK` fail in `LRNGrad` ([CVE-2022-35985](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35985))
+*   Fixes a segfault in `RaggedBincount` ([CVE-2022-35986](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35986))
+*   Fixes a `CHECK` fail in `DenseBincount` ([CVE-2022-35987](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35987))
+*   Fixes a `CHECK` fail in `tf.linalg.matrix_rank` ([CVE-2022-35988](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35988))
+*   Fixes a `CHECK` fail in `MaxPool` ([CVE-2022-35989](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35989))
+*   Fixes a `CHECK` fail in `Conv2DBackpropInput` ([CVE-2022-35999](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35999))
+*   Fixes a `CHECK` fail in `EmptyTensorList` ([CVE-2022-35998](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35998))
+*   Fixes a `CHECK` fail in `tf.sparse.cross` ([CVE-2022-35997](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35997))
+*   Fixes a floating point exception in `Conv2D` ([CVE-2022-35996](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35996))
+*   Fixes a `CHECK` fail in `AudioSummaryV2` ([CVE-2022-35995](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35995))
+*   Fixes a `CHECK` fail in `CollectiveGather` ([CVE-2022-35994](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35994))
+*   Fixes a `CHECK` fail in `SetSize` ([CVE-2022-35993](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35993))
+*   Fixes a `CHECK` fail in `TensorListFromTensor` ([CVE-2022-35992](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35992))
+*   Fixes a `CHECK` fail in `TensorListScatter` and `TensorListScatterV2` ([CVE-2022-35991](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35991))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsPerChannelGradient` ([CVE-2022-35990](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35990))
+*   Fixes a `CHECK` fail in `FakeQuantWithMinMaxVarsGradient` ([CVE-2022-36005](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36005))
+*   Fixes a `CHECK` fail in `tf.random.gamma` ([CVE-2022-36004](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36004))
+*   Fixes a `CHECK` fail in `RandomPoissonV2` ([CVE-2022-36003](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36003))
+*   Fixes a `CHECK` fail in `Unbatch` ([CVE-2022-36002](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36002))
+*   Fixes a `CHECK` fail in `DrawBoundingBoxes` ([CVE-2022-36001](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36001))
+*   Fixes a `CHECK` fail in `Eig` ([CVE-2022-36000](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36000))
+*   Fixes a null dereference on MLIR on empty function attributes ([CVE-2022-36011](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36011))
+*   Fixes an assertion failure on MLIR empty edge names ([CVE-2022-36012](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36012))
+*   Fixes a null-dereference in `mlir::tfg::GraphDefImporter::ConvertNodeDef` ([CVE-2022-36013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36013))
+*   Fixes a null-dereference in `mlir::tfg::TFOp::nameAttr` ([CVE-2022-36014](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36014))
+*   Fixes an integer overflow in math ops ([CVE-2022-36015](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36015))
+*   Fixes a `CHECK`-fail in `tensorflow::full_type::SubstituteFromAttrs` ([CVE-2022-36016](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-36016))
+*   Fixes an OOB read in `Gather_nd` op in TF Lite Micro ([CVE-2022-35938](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-35938))
+
+# Release 2.9.1
+
+Add an upper bound for `protobuf` in `setup.py` since `protobuf` after version 3.20 is currently incompatible with TensorFlow. See https://github.com/tensorflow/tensorflow/issues/53234, https://github.com/protocolbuffers/protobuf/issues/9954 and https://github.com/tensorflow/tensorflow/issues/56077.
+
 # Release 2.9.0
 
 # Breaking Changes
diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 79d4702edf2033..5868164f159115 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -387,6 +387,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/core/util:overflow",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
index 5dee6dda09b7dc..1c70d1af88fb5d 100644
--- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc
@@ -30,6 +30,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 
@@ -443,6 +444,16 @@ Status XlaOpKernelContext::ConstantInputAsShape(int index, TensorShape* shape,
   TF_RETURN_IF_ERROR(ConstantInput(index, &literal, mode));
   std::vector<int64_t> dims;
   TF_RETURN_IF_ERROR(LiteralToInt64Vector(literal, &dims));
+
+  int64_t num_elements = 1;
+  for (auto i = dims.begin(); i != dims.end(); ++i) {
+    num_elements = MultiplyWithoutOverflow(num_elements, *i);
+    if (num_elements < 0)
+      return errors::InvalidArgument(
+          "The total elements specified by orig_input_shape is too large.",
+          "Encountered overflow after multiplying", *i,
+          ", result: ", num_elements);
+  }
   *shape = TensorShape(dims);
   return Status::OK();
 }
diff --git a/tensorflow/core/framework/full_type_util.cc b/tensorflow/core/framework/full_type_util.cc
index 0772f3fe42c7c2..eca7b7b8254893 100644
--- a/tensorflow/core/framework/full_type_util.cc
+++ b/tensorflow/core/framework/full_type_util.cc
@@ -164,7 +164,11 @@ Status SubstituteVar(AttrMap& attrs, FullTypeDef& t) {
 }
 
 Status SubstituteForEach(AttrMap& attrs, FullTypeDef& t) {
-  DCHECK_EQ(t.args_size(), 3);
+  if (t.args_size() != 3) {
+    return Status(error::INVALID_ARGUMENT,
+                  absl::StrCat("illegal FOR_EACH type, expected 3 args, got ",
+                               t.args_size()));
+  }
 
   const auto& cont = t.args(0);
   const auto& tmpl = t.args(1);
diff --git a/tensorflow/core/framework/full_type_util_test.cc b/tensorflow/core/framework/full_type_util_test.cc
index 0324e64f96b0f9..6037879d069ff9 100644
--- a/tensorflow/core/framework/full_type_util_test.cc
+++ b/tensorflow/core/framework/full_type_util_test.cc
@@ -510,6 +510,19 @@ TEST(SpecializeType, ForEachOverridesTargetOfNestedForEach) {
   EXPECT_EQ(t_actual.args(1).args(0).args(0).args_size(), 0);
 }
 
+TEST(SpecializeType, ForEachRejectsMalformedInput) {
+  OpDef op;
+  FullTypeDef* t = op.add_output_arg()->mutable_experimental_full_type();
+  t->set_type_id(TFT_FOR_EACH);
+  t->add_args()->set_type_id(TFT_PRODUCT);
+
+  NodeDef ndef;
+  AttrSlice attrs(ndef);
+
+  FullTypeDef ft;
+  EXPECT_FALSE(SpecializeType(attrs, op, ft).ok());
+}
+
 TEST(SpecializeType, RemovesLegacyVariant) {
   OpDef op;
   FullTypeDef* t = op.add_output_arg()->mutable_experimental_full_type();
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 216df1799f288a..ca3930e85ca90a 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -29,6 +29,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/tensor.h"
 
+#include <memory>
 #include <utility>
 
 #include "absl/strings/escaping.h"
@@ -1183,12 +1184,10 @@ void PrintOneDimV2(int dim_index, const gtl::InlinedVector<int64, 4>& shape,
 }
 
 template <typename T>
-string SummarizeArray(int64_t limit, int64_t num_elts,
-                      const TensorShape& tensor_shape, const char* data,
-                      const bool print_v2) {
+string SummarizeArrayInternal(int64_t limit, int64_t num_elts,
+                              const TensorShape& tensor_shape, const T* array,
+                              const bool print_v2) {
   string ret;
-  const T* array = reinterpret_cast<const T*>(data);
-
   const gtl::InlinedVector<int64_t, 4> shape = tensor_shape.dim_sizes();
   if (shape.empty()) {
     for (int64_t i = 0; i < limit; ++i) {
@@ -1211,6 +1210,29 @@ string SummarizeArray(int64_t limit, int64_t num_elts,
 
   return ret;
 }
+
+template <typename T>
+string SummarizeArray(int64_t limit, int64_t num_elts,
+                      const TensorShape& tensor_shape, const char* data,
+                      const bool print_v2) {
+  const T* array = reinterpret_cast<const T*>(data);
+  return SummarizeArrayInternal<T>(limit, num_elts, tensor_shape, array,
+                                   print_v2);
+}
+
+template <>
+string SummarizeArray<bool>(int64_t limit, int64_t num_elts,
+                            const TensorShape& tensor_shape, const char* data,
+                            const bool print_v2) {
+  // We first convert all chars to be 0/1 to not get InvalidEnumValue sanitizer
+  // error
+  auto mutable_data = std::unique_ptr<char[]>(new char[num_elts]);
+  for (int64_t i = 0; i < num_elts; ++i)
+    mutable_data.get()[i] = data[i] ? 1 : 0;
+  bool* array = reinterpret_cast<bool*>(mutable_data.get());
+  return SummarizeArrayInternal<bool>(limit, num_elts, tensor_shape, array,
+                                      print_v2);
+}
 }  // namespace
 
 string Tensor::SummarizeValue(int64_t max_entries, bool print_v2) const {
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 4b647284f2955f..2f9f53fb44cd50 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -291,6 +291,11 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
 
   std::vector<const FunctionDef::ArgAttrs*> arg_attr(inputs.size(), nullptr);
   for (const auto& attr : func.arg_attr()) {
+    if (attr.first >= inputs.size()) {
+      return errors::InvalidArgument("Invalid attribute index, got ",
+                                     attr.first, " but expected less than ",
+                                     inputs.size());
+    }
     arg_attr.at(attr.first) = &attr.second;
   }
 
diff --git a/tensorflow/core/ir/importexport/functiondef_import.cc b/tensorflow/core/ir/importexport/functiondef_import.cc
index 842f27bcc9c61e..721180c53594c8 100644
--- a/tensorflow/core/ir/importexport/functiondef_import.cc
+++ b/tensorflow/core/ir/importexport/functiondef_import.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/status.h"
 #include "tensorflow/core/protobuf/graph_debug_info.pb.h"
+#include "tensorflow/core/platform/statusor.h"
 
 using tensorflow::AttrValue;
 using tensorflow::FunctionDef;
@@ -45,6 +46,7 @@ using tensorflow::NodeDef;
 using tensorflow::OpDef;
 using tensorflow::OpDef_AttrDef;
 using tensorflow::Status;
+using tensorflow::StatusOr;
 using tensorflow::errors::InvalidArgument;
 using tensorflow::protobuf::RepeatedPtrField;
 
@@ -171,9 +173,12 @@ Status ImportNodes(ValueMapManager value_manager,
     if (node.op().empty()) return InvalidArgument("empty op type");
     OperationState state(unknown_loc, absl::StrCat("tfg.", node.op()));
     // Fetch the inputs, creating placeholder if an input hasn't been visited.
-    for (const std::string& input : node.input())
+    for (const std::string& input : node.input()) {
+      if (input.empty())
+        return InvalidArgument("Node '", node.name(), "' has an empty input");
       state.operands.push_back(
           value_manager.GetValueOrCreatePlaceholder(input));
+    }
     // Retrieve the entry in the nodes_map for this node and infer the result
     // count from what was inferred during the first traversal above.
     state.types.push_back(placeholder_ty);
@@ -337,6 +342,8 @@ Status ImportGenericFunction(
   // Import the function attributes with a `tf.` prefix to match the current
   // infrastructure expectations.
   for (const auto& namedAttr : func.attr()) {
+    if (namedAttr.first.empty())
+      return InvalidArgument("Invalid function attribute name");
     const std::string& name = "tf." + namedAttr.first;
     const AttrValue& tf_attr = namedAttr.second;
     TF_ASSIGN_OR_RETURN(Attribute attr,
@@ -463,21 +470,31 @@ Status ImportGenericFunction(
                               Value());
   for (const auto& ret_val : func.ret()) {
     auto position = output_name_to_position.find(ret_val.first);
-    if (position == output_name_to_position.end())
+    if (position == output_name_to_position.end()) {
       return InvalidArgument(
           "Can't import function, returned value references unknown output "
           "argument ",
           ret_val.first);
+    }
+    if (ret_val.second.empty()) {
+      return InvalidArgument("Function '", func.signature().name(),
+                             "' has empty result name");
+    }
     ret_vals[position->second] =
         value_manager.GetValueOrCreatePlaceholder(ret_val.second);
   }
   for (const auto& ret_val : func.control_ret()) {
     auto position = control_output_to_position.find(ret_val.first);
-    if (position == control_output_to_position.end())
+    if (position == control_output_to_position.end()) {
       return InvalidArgument(
           "Can't import function, returned value references unknown output "
           "argument ",
           ret_val.first);
+    }
+    if (ret_val.second.empty()) {
+      return InvalidArgument("Function '", func.signature().name(),
+                             "' has empty control result name");
+    }
     Value result = value_manager.GetValueOrCreatePlaceholder(
         (Twine("^") + ret_val.second).str());
     if (!result.getType().isa<ControlType>())
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt
new file mode 100644
index 00000000000000..b7d82f87842dc7
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_control_result.pbtxt
@@ -0,0 +1,26 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Function 'foo' has empty control result name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      control_output: "output"
+    }
+    node_def {
+      name: "y"
+      op: "NoOp"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+    control_ret {
+      key: "output"
+      value: ""
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt
new file mode 100644
index 00000000000000..5b1c3cff4f85ab
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_input.pbtxt
@@ -0,0 +1,22 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Node 'y' has an empty input
+
+library {
+  function {
+    signature {
+      name: "foo"
+    }
+    node_def {
+      name: "y"
+      input: ""
+      op: "Identity"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt
new file mode 100644
index 00000000000000..f4fc5263ebf790
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_func_with_empty_result.pbtxt
@@ -0,0 +1,29 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Function 'foo' has empty result name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      output_arg {
+        name: "output"
+        type: DT_INT32
+      }
+    }
+    node_def {
+      name: "y"
+      op: "NoOp"
+      attr {
+        key: "T"
+        value {
+          placeholder: "T"
+        }
+      }
+    }
+    ret {
+      key: "output"
+      value: ""
+    }
+  }
+}
diff --git a/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt
new file mode 100644
index 00000000000000..7a0f18f6732027
--- /dev/null
+++ b/tensorflow/core/ir/importexport/tests/graphdef_to_mlir/invalid_generic_function_attr_name.pbtxt
@@ -0,0 +1,52 @@
+# RUN: not tfg-translate -graphdef-to-mlir %s 2>&1 | FileCheck %s
+
+# CHECK: Invalid function attribute name
+
+library {
+  function {
+    signature {
+      name: "foo"
+      input_arg {
+        name: "a"
+      }
+      output_arg {
+        name: "d"
+      }
+    }
+    node_def {
+      op: "Const"
+      attr {
+        key: "_b"
+        value {
+          placeholder: "T"
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_INT32
+            tensor_shape {
+            }
+          }
+        }
+      }
+    }
+    ret {
+      key: "d"
+      value: "a"
+    }
+    attr {
+      key: ""
+      value {
+        s: "a"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0f8482400e9f12..7b53066a03bf23 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -443,6 +443,7 @@ tf_cc_test(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
+        "//tensorflow/core/platform:status_matchers",
         "@com_google_absl//absl/base:core_headers",
     ],
 )
@@ -4355,6 +4356,7 @@ tf_kernel_library(
     deps = [
         ":fill_functor",
         ":gpu_prim_hdrs",
+        ":sparse_utils",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
@@ -4903,6 +4905,7 @@ cc_library(
 SPARSE_DEPS = [
     "//tensorflow/core:framework",
     "//tensorflow/core:lib",
+    ":sparse_utils",
 ]
 
 tf_kernel_library(
@@ -6373,6 +6376,7 @@ filegroup(
         "sparse_reorder_op.h",
         "sparse_slice_op.h",
         "sparse_tensor_dense_matmul_op.h",
+        "sparse_utils.h",
         "string_util.h",
         "string_to_hash_bucket_op.h",
         "string_to_hash_bucket_fast_op.h",
@@ -6610,6 +6614,7 @@ filegroup(
         "random_op_cpu.h",
         "random_ops_util.h",
         "random_poisson_op.cc",
+        "sparse_utils.cc",
         "random_shuffle_op.cc",
         "reduce_join_op.cc",
         "reduction_ops_all.cc",
diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc
index 0429d50cdec23a..bc536d187512af 100644
--- a/tensorflow/core/kernels/avgpooling_op.cc
+++ b/tensorflow/core/kernels/avgpooling_op.cc
@@ -35,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/util/overflow.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 
@@ -77,10 +78,10 @@ class AvgPoolingOp : public UnaryOp<T> {
     OP_REQUIRES(context, ksize_[0] == 1 && stride_[0] == 1,
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
-
     for (int i = 0; i < ksize_.size(); ++i) {
-      OP_REQUIRES(context, ksize_[i] != 0,
-                  errors::InvalidArgument("ksize cannot be zero"));
+      OP_REQUIRES(context, ksize_[i] > 0,
+                  errors::InvalidArgument(
+                      "ksize must be a postive int32 value, got:", ksize_[i]));
     }
   }
 
@@ -142,6 +143,11 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
     OP_REQUIRES(context, ksize_.size() == 4,
                 errors::InvalidArgument("Sliding window ksize field must "
                                         "specify 4 dimensions"));
+    for (int i = 0; i < ksize_.size(); ++i) {
+      OP_REQUIRES(context, ksize_[i] > 0,
+                  errors::InvalidArgument(
+                      "ksize must be a postive int32 value, got:", ksize_[i]));
+    }
     OP_REQUIRES_OK(context, context->GetAttr("strides", &stride_));
     OP_REQUIRES(context, stride_.size() == 4,
                 errors::InvalidArgument("Sliding window stride field must "
@@ -298,7 +304,7 @@ class AvgPoolingGradOp : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
     const int64_t in_rows = output_shape.dim_size(1);
     const int64_t in_cols = output_shape.dim_size(2);
@@ -457,7 +463,7 @@ class AvgPoolingGradOp<GPUDevice, T> : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
 
     if (output_shape.num_elements() == 0) {
@@ -543,7 +549,7 @@ class AvgPoolingGradOpCustomGPUKernel : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
     if (output_shape.num_elements() == 0) {
       Tensor* output = nullptr;
diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc
index 0bb0a43b7d0e9f..dce6c221a1e61a 100644
--- a/tensorflow/core/kernels/batch_kernels.cc
+++ b/tensorflow/core/kernels/batch_kernels.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h"
@@ -654,6 +655,12 @@ class UnbatchResource : public ResourceBase {
           batch_index_t.shape().dim_size(1), ".");
     }
 
+    if (!TensorShapeUtils::IsScalar(context->input(2).shape())) {
+      return errors::InvalidArgument(
+          "Input id should be scalar; "
+          "Got: ",
+          context->input(2).DebugString(), ".");
+    }
     const int64_t batch_key = context->input(2).scalar<int64_t>()();
     const bool nonempty_input = batch_index_t.dim_size(0) > 0;
 
@@ -885,8 +892,13 @@ class UnbatchGradResource : public ResourceBase {
     const Tensor& data_t = context->input(0);
     const Tensor& batch_index_t = context->input(1);
     const Tensor& grad_t = context->input(2);
+    const Tensor& batch_key_t = context->input(3);
 
     mutex_lock ml(mu_);
+    if (batch_key_t.NumElements() != 1) {
+      return errors::InvalidArgument("Expected `id` to be scalar. Received ",
+                                     batch_key_t.DebugString());
+    }
 
     const int64_t batch_key = context->input(3).scalar<int64_t>()();
     // Mark our tensor as available.
@@ -902,6 +914,11 @@ class UnbatchGradResource : public ResourceBase {
             "batch_index is empty while the tensor isn't.");
       }
       std::unordered_set<int64_t> missing_tensors;
+      if (batch_index_t.NumElements() != batch_index_t.dim_size(0) * 3) {
+        return errors::InvalidArgument(
+            "batch_index should contain ", batch_index_t.dim_size(0) * 3,
+            " elements. Received ", batch_index_t.NumElements());
+      }
       const auto batch_index =
           batch_index_t.shaped<int64_t, 2>({batch_index_t.dim_size(0), 3});
       for (int i = 0; i < batch_index_t.dim_size(0); ++i) {
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index aad11a45a09b6c..c8fb81f9591546 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/kernels/fill_functor.h"
+#include "tensorflow/core/kernels/sparse_utils.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/determinism.h"
@@ -279,6 +280,14 @@ class DenseBincountOp : public OpKernel {
     OP_REQUIRES(ctx, size_t.dims() == 0,
                 errors::InvalidArgument("Shape must be rank 0 but is rank ",
                                         size_t.dims()));
+    OP_REQUIRES(ctx,
+                weights.shape() == data.shape() || weights.NumElements() == 0,
+                errors::InvalidArgument(
+                    "`weights` must be the same shape as `arr` or a length-0 "
+                    "`Tensor`, in which case it acts as all weights equal to "
+                    "1. Received ",
+                    weights.shape().DebugString()));
+
     Tidx size = size_t.scalar<Tidx>()();
     OP_REQUIRES(
         ctx, size >= 0,
@@ -369,7 +378,8 @@ class SparseBincountOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& indices = ctx->input(0);
-    const auto values = ctx->input(1).flat<Tidx>();
+    const Tensor& values = ctx->input(1);
+    const auto values_flat = values.flat<Tidx>();
     const Tensor& dense_shape = ctx->input(2);
     const Tensor& size_t = ctx->input(3);
     const auto weights = ctx->input(4).flat<T>();
@@ -382,6 +392,9 @@ class SparseBincountOp : public OpKernel {
     OP_REQUIRES(
         ctx, size >= 0,
         errors::InvalidArgument("size (", size, ") must be non-negative"));
+    OP_REQUIRES_OK(ctx, sparse_utils::ValidateSparseTensor<int64_t>(
+                            indices, values, dense_shape,
+                            sparse_utils::IndexValidation::kUnordered));
 
     bool is_1d = dense_shape.NumElements() == 1;
 
@@ -394,11 +407,11 @@ class SparseBincountOp : public OpKernel {
       if (binary_output_) {
         OP_REQUIRES_OK(ctx,
                        functor::BincountFunctor<Device, Tidx, T, true>::Compute(
-                           ctx, values, weights, out, size));
+                           ctx, values_flat, weights, out, size));
       } else {
         OP_REQUIRES_OK(
             ctx, functor::BincountFunctor<Device, Tidx, T, false>::Compute(
-                     ctx, values, weights, out, size));
+                     ctx, values_flat, weights, out, size));
       }
     } else {
       const auto shape = dense_shape.flat<int64_t>();
@@ -410,7 +423,7 @@ class SparseBincountOp : public OpKernel {
       const auto indices_mat = indices.matrix<int64_t>();
       for (int64_t i = 0; i < indices_mat.dimension(0); ++i) {
         const int64_t batch = indices_mat(i, 0);
-        const Tidx bin = values(i);
+        const Tidx bin = values_flat(i);
         OP_REQUIRES(
             ctx, batch < out.dimension(0),
             errors::InvalidArgument("Index out of bound. `batch` (", batch,
@@ -480,6 +493,9 @@ class RaggedBincountOp : public OpKernel {
     int num_values = values.size();
     int batch_idx = 0;
 
+    OP_REQUIRES(ctx, splits.size() > 0,
+                errors::InvalidArgument("Splits must be non-empty"));
+
     OP_REQUIRES(ctx, splits(0) == 0,
                 errors::InvalidArgument("Splits must start with 0, not with ",
                                         splits(0)));
diff --git a/tensorflow/core/kernels/candidate_sampler_ops.cc b/tensorflow/core/kernels/candidate_sampler_ops.cc
index 872e805873f4ec..94eb7f2738eb53 100644
--- a/tensorflow/core/kernels/candidate_sampler_ops.cc
+++ b/tensorflow/core/kernels/candidate_sampler_ops.cc
@@ -73,6 +73,14 @@ class BaseCandidateSamplerOp : public OpKernel {
 
     gtl::ArraySlice<int64_t> true_candidate(
         true_classes.matrix<int64_t>().data(), batch_size * num_true_);
+
+    for (const auto& candidate : true_candidate) {
+      OP_REQUIRES(context, candidate >= 0 && candidate < sampler_->range(),
+                  errors::InvalidArgument("`true_candidate` out of range [", 0,
+                                          ", ", sampler_->range(),
+                                          "), received ", candidate));
+    }
+
     gtl::MutableArraySlice<int64_t> sampled_candidate(
         out_sampled_candidates->vec<int64_t>().data(), num_sampled_);
     gtl::MutableArraySlice<float> true_expected_count(
diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc
index 792e2a2479ad1a..4f620050c0323d 100644
--- a/tensorflow/core/kernels/collective_ops.cc
+++ b/tensorflow/core/kernels/collective_ops.cc
@@ -176,6 +176,10 @@ class CollectiveGatherOpKernel : public CollectiveOpV1Kernel {
   void ComputeAsyncImpl(OpKernelContext* c, CollectiveExecutor* col_exec,
                         DoneCallback done) override {
     auto output_shape = c->input(0).shape();
+    OP_REQUIRES_ASYNC(c, output_shape.dims() > 0,
+                      errors::InvalidArgument("input should have rank > 0, ",
+                                              "recieved ", output_shape.dims()),
+                      done);
     output_shape.set_dim(
         0, output_shape.dim_size(0) * col_params_->group.group_size);
     col_params_->instance.shape = output_shape;
diff --git a/tensorflow/core/kernels/composite_tensor_ops.cc b/tensorflow/core/kernels/composite_tensor_ops.cc
index f41b02991bba43..4e689d27d5acba 100644
--- a/tensorflow/core/kernels/composite_tensor_ops.cc
+++ b/tensorflow/core/kernels/composite_tensor_ops.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/kernels/composite_tensor_variant.h"
@@ -66,7 +67,16 @@ class CompositeTensorVariantToComponents : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     Tensor encoded_t = context->input(0);
+    OP_REQUIRES(
+        context, encoded_t.flat<Variant>().size() > 0,
+        errors::InvalidArgument("Input `encoded` must not be an empty variant "
+                                "tensor, but got ",
+                                encoded_t.DebugString()));
     auto* encoded = encoded_t.flat<Variant>()(0).get<CompositeTensorVariant>();
+    OP_REQUIRES(context, encoded != nullptr,
+                errors::InvalidArgument("The input `encoded` is not a valid "
+                                        "CompositeTensorVariant tensor, got ",
+                                        encoded_t.DebugString()));
 
     // Check that the encoded TypeSpec is compatible with the expected TypeSpec.
     // For now, we just check that the class matches.
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.h b/tensorflow/core/kernels/conv_grad_input_ops.h
index 88df14d395c73d..6fdd72e0014b07 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.h
+++ b/tensorflow/core/kernels/conv_grad_input_ops.h
@@ -37,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/conv_grad_ops.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
 #include "tensorflow/core/kernels/xsmm_conv2d.h"
 #endif
@@ -421,6 +422,11 @@ class Conv2DBackpropInputOp : public OpKernel {
     const Tensor& filter = context->input(1);
     const Tensor& out_backprop = context->input(2);
 
+    OP_REQUIRES(
+        context, out_backprop.dims() == 4,
+        errors::InvalidArgument("input_sizes must be 4-dimensional, got: ",
+                                out_backprop.dims()));
+
     TensorShape input_shape;
     OP_REQUIRES_OK(context,
                    Conv2DBackpropComputeInputShape(input_sizes, filter.shape(),
@@ -436,6 +442,15 @@ class Conv2DBackpropInputOp : public OpKernel {
       return;
     }
 
+    // If shapes are valid but `out_backprop` is empty, in_backprop should be
+    // set to all zeros.  Otherwise, cudnn/dnnl fail with an empty input.
+    if (out_backprop.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, T> set_zero;
+      set_zero(context->eigen_device<Device>(),
+               in_backprop->template flat<T>());
+      return;
+    }
+
     // For now we take the stride from the second and third dimensions only (we
     // do not support striding on the batch or depth dimension).
     const int stride_rows = GetTensorDim(strides_, data_format_, 'H');
@@ -517,6 +532,10 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
     const Tensor& input_sizes = context->input(0);
     const Tensor& filter = context->input(1);
     const Tensor& out_backprop = context->input(2);
+    OP_REQUIRES(
+        context, out_backprop.dims() == 4,
+        errors::InvalidArgument("input_sizes must be 4-dimensional, got: ",
+                                out_backprop.dims()));
 
     TensorShape input_shape;
     OP_REQUIRES_OK(context,
@@ -554,6 +573,15 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
       return;
     }
 
+    // If shapes are valid but `out_backprop` is empty, in_backprop should be
+    // set to all zeros.  Otherwise, cudnn/dnnl fail with an empty input.
+    if (out_backprop.NumElements() == 0) {
+      functor::SetZeroFunctor<Device, T> set_zero;
+      set_zero(context->eigen_device<Device>(),
+               in_backprop->template flat<T>());
+      return;
+    }
+
 // TODO(ezhulenev): Remove custom kernel and move XSMM support to
 // LaunchConv2DBackpropInputOp functor.
 #if defined TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS && \
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 67418151a1cf2d..44caed29252057 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/conv_2d.h"
 #include "tensorflow/core/kernels/deep_conv2d.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
@@ -700,6 +701,15 @@ class Conv2DOp : public BinaryOp<T> {
       return;
     }
 
+    // If the input is empty, result can only be due to padding.
+    if (input.NumElements() == 0) {
+      // Zero-out output and return.
+      functor::SetZeroFunctor<Device, T>()(context->eigen_device<Device>(),
+                                           output->template flat<T>());
+
+      return;
+    }
+
 #ifdef TENSORFLOW_USE_LIBXSMM_CONVOLUTIONS
     if (params_.padding != EXPLICIT &&
         LaunchXsmmConvOp<Device, T>::Run(
diff --git a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
index 2d8feb313a30f4..dc5fd97103cc38 100644
--- a/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
+++ b/tensorflow/core/kernels/conv_ops_fused_image_transform.cc
@@ -667,8 +667,11 @@ class FusedResizeConv2DUsingGemmOp : public OpKernel {
       st.height_scale = 1.0f;
       st.width_scale = 1.0f;
     }
-    TensorShape resized_shape(
-        {input.dim_size(0), st.out_height, st.out_width, input.dim_size(3)});
+    TensorShape resized_shape;
+    OP_REQUIRES_OK(context, TensorShape::BuildTensorShape(
+                                {input.dim_size(0), st.out_height, st.out_width,
+                                 input.dim_size(3)},
+                                &resized_shape));
     int paddings_index;
     int filter_index;
     if (DoResize) {
diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h
index cdeb62cbeddbc8..9bdc107f9f580f 100644
--- a/tensorflow/core/kernels/cwise_ops_common.h
+++ b/tensorflow/core/kernels/cwise_ops_common.h
@@ -450,13 +450,15 @@ struct BinaryFunctor<CPUDevice, Functor, 2, false> {
     Assign(d, out, in.unaryExpr(Unary(scalar.data())));
   }
 
-  inline Eigen::IndexList<int, Eigen::type2index<1>> NByOne(int n) {
-    Eigen::IndexList<int, Eigen::type2index<1>> ret;
+  inline Eigen::IndexList<Eigen::DenseIndex, Eigen::type2index<1>> NByOne(
+      Eigen::DenseIndex n) {
+    Eigen::IndexList<Eigen::DenseIndex, Eigen::type2index<1>> ret;
     ret.set(0, n);
     return ret;
   }
-  inline Eigen::IndexList<Eigen::type2index<1>, int> OneByM(int m) {
-    Eigen::IndexList<Eigen::type2index<1>, int> ret;
+  inline Eigen::IndexList<Eigen::type2index<1>, Eigen::DenseIndex> OneByM(
+      Eigen::DenseIndex m) {
+    Eigen::IndexList<Eigen::type2index<1>, Eigen::DenseIndex> ret;
     ret.set(1, m);
     return ret;
   }
@@ -487,10 +489,10 @@ struct BinaryFunctor<CPUDevice, Functor, 2, false> {
       // use_broadcast_optimization<T> are compile-time constant, gcc
       // does a decent job avoiding generating code when conditions
       // are not met.
-      const int a = in0.dimension(0);  // in0 is shape [a, b]
-      const int b = in0.dimension(1);
-      const int c = in1.dimension(0);  // in1 is shape [c, d]
-      const int d = in1.dimension(1);
+      const Eigen::DenseIndex a = in0.dimension(0);  // in0 is shape [a, b]
+      const Eigen::DenseIndex b = in0.dimension(1);
+      const Eigen::DenseIndex c = in1.dimension(0);  // in1 is shape [c, d]
+      const Eigen::DenseIndex d = in1.dimension(1);
       if ((a == 1) && (d == 1)) {
         auto lhs = in0.reshape(OneByM(b)).broadcast(NByOne(c));
         auto rhs = in1.reshape(NByOne(c)).broadcast(OneByM(b));
diff --git a/tensorflow/core/kernels/fake_quant_ops.cc b/tensorflow/core/kernels/fake_quant_ops.cc
index aa59213c67d81a..682459866e9885 100644
--- a/tensorflow/core/kernels/fake_quant_ops.cc
+++ b/tensorflow/core/kernels/fake_quant_ops.cc
@@ -24,6 +24,7 @@ limitations under the License.
 // Above is the related header but clang tidy doesn't recognize it.
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/monitoring/gauge.h"
 #include "tensorflow/core/platform/protobuf.h"
@@ -205,6 +206,13 @@ class FakeQuantWithMinMaxVarsOp : public OpKernel {
     const Tensor& min = context->input(1);
     const Tensor& max = context->input(2);
 
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min.shape()),
+        InvalidArgument("`min` must be rank 0 but is rank ", min.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max.shape()),
+        InvalidArgument("`max` must be rank 0 but is rank ", max.dims()));
+
     Tensor* output;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
@@ -253,6 +261,12 @@ class FakeQuantWithMinMaxVarsGradientOp : public OpKernel {
                 InvalidArgument("gradient and input must be the same size"));
     const Tensor& min = context->input(2);
     const Tensor& max = context->input(3);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min.shape()),
+        InvalidArgument("`min` must be rank 0 but is rank ", min.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max.shape()),
+        InvalidArgument("`max` must be rank 0 but is rank ", max.dims()));
 
     Tensor* grad_wrt_input;
     OP_REQUIRES_OK(context,
@@ -342,10 +356,17 @@ class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel {
     const Tensor& input = context->input(0);
     const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
     const Tensor& min = context->input(1);
+    const Tensor& max = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(min.shape()),
+        InvalidArgument("`min` must be rank 1 but is rank ", min.dims()));
     OP_REQUIRES(context, min.dim_size(0) == depth,
                 InvalidArgument("min has incorrect size, expected ", depth,
                                 " was ", min.dim_size(0)));
-    const Tensor& max = context->input(2);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(max.shape()),
+        InvalidArgument("`max` must be rank 1 but is rank ", max.dims()));
     OP_REQUIRES(context, max.dim_size(0) == depth,
                 InvalidArgument("max has incorrect size, expected ", depth,
                                 " was ", max.dim_size(0)));
@@ -399,10 +420,16 @@ class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel {
                 InvalidArgument("gradient and input must be the same size"));
     const int depth = input.dim_size(input.dims() - 1);  // last dimension size.
     const Tensor& min = context->input(2);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(min.shape()),
+        InvalidArgument("`min` must be rank 1 but is rank ", min.dims()));
     OP_REQUIRES(context, min.dim_size(0) == depth,
                 InvalidArgument("min has incorrect size, expected ", depth,
                                 " was ", min.dim_size(0)));
     const Tensor& max = context->input(3);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsVector(max.shape()),
+        InvalidArgument("`max` must be rank 1 but is rank ", max.dims()));
     OP_REQUIRES(context, max.dim_size(0) == depth,
                 InvalidArgument("max has incorrect size, expected ", depth,
                                 " was ", max.dim_size(0)));
diff --git a/tensorflow/core/kernels/fractional_avg_pool_op.cc b/tensorflow/core/kernels/fractional_avg_pool_op.cc
index b3e65aeaee22f8..3bb206866089a0 100644
--- a/tensorflow/core/kernels/fractional_avg_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_avg_pool_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #define EIGEN_USE_THREADS
 
 #include <algorithm>
@@ -19,15 +20,15 @@ limitations under the License.
 #include <random>
 #include <vector>
 
-#include "tensorflow/core/kernels/fractional_pool_common.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
+#include "tensorflow/core/util/overflow.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -43,6 +44,12 @@ class FractionalAvgPoolOp : public OpKernel {
     OP_REQUIRES(context, pooling_ratio_.size() == 4,
                 errors::InvalidArgument(
                     "pooling_ratio field must specify 4 dimensions"));
+    for (std::size_t i = 0; i < pooling_ratio_.size(); ++i) {
+      OP_REQUIRES(context, pooling_ratio_[i] >= 1,
+                  errors::InvalidArgument(
+                      "pooling_ratio cannot be smaller than 1, got: ",
+                      pooling_ratio_[i]));
+    }
     OP_REQUIRES(
         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
         errors::Unimplemented("Fractional average pooling is not yet "
@@ -81,9 +88,11 @@ class FractionalAvgPoolOp : public OpKernel {
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
       OP_REQUIRES(
-          context, pooling_ratio_[i] <= input_size[i],
-          errors::InvalidArgument(
-              "Pooling ratio cannot be bigger than input tensor dim size."));
+          context, input_size[i] >= pooling_ratio_[i],
+          errors::InvalidArgument("Pooling ratio is higher than input "
+                                  "dimension size for dimension ",
+                                  i, ". Input dim size: ", input_size[i],
+                                  " pooling ratio: ", pooling_ratio_[i]));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -241,7 +250,32 @@ class FractionalAvgPoolGradOp : public OpKernel {
                     orig_input_tensor_shape.NumElements() == 4,
                 errors::InvalidArgument("original input tensor shape must be"
                                         "1-dimensional and 4 elements"));
+    int64_t num_elements = 1;
+    for (int i = 0; i < orig_input_tensor_shape.dims(); i++) {
+      OP_REQUIRES(context, orig_input_tensor_shape.dim_size(i) > 0,
+                  errors::InvalidArgument(
+                      "orig_input_tensor_shape must be positive, got: ",
+                      orig_input_tensor_shape.dim_size(i)));
+      num_elements = MultiplyWithoutOverflow(
+          num_elements, orig_input_tensor_shape.dim_size(i));
+      OP_REQUIRES(
+          context, num_elements > 0,
+          errors::InvalidArgument(
+              "The total elements specified by orig_input_tensor_shape",
+              " is too large. Encountered overflow after multiplying ",
+              orig_input_tensor_shape.dim_size(i), ", result: ", num_elements));
+    }
+
     const Tensor& out_backprop = context->input(1);
+    OP_REQUIRES(context, out_backprop.dims() == 4,
+                errors::InvalidArgument("out_backprop must be 4-dimensional"));
+    for (int i = 0; i < out_backprop.dims(); i++) {
+      OP_REQUIRES(context, out_backprop.dim_size(i) > 0,
+                  errors::InvalidArgument(
+                      "out_backprop must be positive for all dimension, got:",
+                      out_backprop.dim_size(i)));
+    }
+
     const Tensor& row_seq_tensor = context->input(2);
     const Tensor& col_seq_tensor = context->input(3);
 
diff --git a/tensorflow/core/kernels/fractional_max_pool_op.cc b/tensorflow/core/kernels/fractional_max_pool_op.cc
index 0722c408fba9d4..ec08b5c5028727 100644
--- a/tensorflow/core/kernels/fractional_max_pool_op.cc
+++ b/tensorflow/core/kernels/fractional_max_pool_op.cc
@@ -19,12 +19,13 @@ limitations under the License.
 #include <random>
 #include <vector>
 
-#include "tensorflow/core/kernels/fractional_pool_common.h"
-
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
+#include "tensorflow/core/kernels/fractional_pool_common.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/util/guarded_philox_random.h"
@@ -44,6 +45,12 @@ class FractionalMaxPoolOp : public OpKernel {
     OP_REQUIRES(context, pooling_ratio_.size() == 4,
                 errors::InvalidArgument("pooling_ratio field must "
                                         "specify 4 dimensions"));
+    for (std::size_t i = 0; i < pooling_ratio_.size(); ++i) {
+      OP_REQUIRES(context, pooling_ratio_[i] >= 1,
+                  errors::InvalidArgument(
+                      "pooling_ratio cannot be smaller than 1, got: ",
+                      pooling_ratio_[i]));
+    }
 
     OP_REQUIRES(
         context, pooling_ratio_[0] == 1 || pooling_ratio_[3] == 1,
@@ -257,6 +264,18 @@ class FractionalMaxPoolGradOp : public OpKernel {
     OP_REQUIRES(context, tensor_out.NumElements() > 0,
                 errors::InvalidArgument("orig_output must not be empty, got ",
                                         tensor_out.DebugString()));
+    OP_REQUIRES(
+        context,
+        height_seq_tensor.NumElements() * width_seq_tensor.NumElements() <=
+            tensor_in.NumElements(),
+        errors::InvalidArgument(
+            "Pooling region has more elements than the input tensor. "
+            "row_pooling_sequence: ",
+            height_seq_tensor.DebugString(),
+            "col_pooling_sequence: ", width_seq_tensor.DebugString(),
+            "orig_input: ", tensor_in.DebugString()));
+
+    //
     std::vector<int64_t> input_size(tensor_in_and_out_dims);
     std::vector<int64_t> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
@@ -352,7 +371,9 @@ class FractionalMaxPoolGradOp : public OpKernel {
         output_size[2] * output_size[1] * output_size[0];
     for (int64_t i = 0; i < num_reshaped_cols; ++i) {
       for (int64_t j = 0; j < output_size[3]; ++j) {
-        DCHECK_EQ(tensor_out_dup_mat(j, i), tensor_out_mat(j, i));
+        OP_REQUIRES(context, tensor_out_dup_mat(j, i) == tensor_out_mat(j, i),
+                    errors::InvalidArgument(
+                        "tensor_out_dup is not the same as tensor_out"));
       }
     }
 
@@ -369,11 +390,12 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     for (int index = 0; index < num_total_outputs; ++index) {
       int input_backprop_index = out_arg_max_flat(index);
-      // According to maxpooling_op.cc, the performance impact below is small.
-      CHECK(input_backprop_index >= 0 &&
-            input_backprop_index < num_total_inputs)
-          << "Invalid input backprop index: " << input_backprop_index << ", "
-          << num_total_inputs;
+      OP_REQUIRES(
+          context,
+          input_backprop_index >= 0 && input_backprop_index < num_total_inputs,
+          errors::InvalidArgument(
+              "Invalid input backprop index: ", input_backprop_index, ", ",
+              num_total_inputs));
       input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
     }
   }
diff --git a/tensorflow/core/kernels/image/draw_bounding_box_op.cc b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
index 0ce1f3fa8a8291..d1a5b59146fde1 100644
--- a/tensorflow/core/kernels/image/draw_bounding_box_op.cc
+++ b/tensorflow/core/kernels/image/draw_bounding_box_op.cc
@@ -119,7 +119,7 @@ class DrawBoundingBoxesOp : public OpKernel {
 
     for (int64_t b = 0; b < batch_size; ++b) {
       const int64_t num_boxes = boxes.dim_size(1);
-      const auto tboxes = boxes.tensor<T, 3>();
+      const auto tboxes = boxes.tensor<float, 3>();
       for (int64_t bb = 0; bb < num_boxes; ++bb) {
         int64_t color_index = bb % color_table.size();
         const int64_t min_box_row =
diff --git a/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
index a12cd3e6601fcd..80dc57377be1a5 100644
--- a/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
+++ b/tensorflow/core/kernels/image/generate_box_proposals_op.cu.cc
@@ -312,6 +312,22 @@ class GenerateBoundingBoxProposals : public tensorflow::OpKernel {
     const auto bbox_deltas = context->input(1);
     const auto image_info = context->input(2);
     const auto anchors = context->input(3);
+
+    OP_REQUIRES(context, scores.dims() == 4,
+                errors::InvalidArgument("`scores` must be rank 4 but is rank ",
+                                        scores.dims()));
+    OP_REQUIRES(
+        context, bbox_deltas.dims() == 4,
+        errors::InvalidArgument("`bbox_deltas` must be rank 4 but is rank ",
+                                bbox_deltas.dims()));
+    OP_REQUIRES(
+        context, image_info.dims() == 2,
+        errors::InvalidArgument("`image_info` must be rank 2 but is rank ",
+                                image_info.dims()));
+    OP_REQUIRES(context, anchors.dims() == 3,
+                errors::InvalidArgument("`anchors` must be rank 3 but is rank ",
+                                        anchors.dims()));
+
     const auto num_images = scores.dim_size(0);
     const auto num_anchors = scores.dim_size(3);
     const auto height = scores.dim_size(1);
diff --git a/tensorflow/core/kernels/image/image_ops.cc b/tensorflow/core/kernels/image/image_ops.cc
index c2e769f146c761..113a9b2af9a822 100644
--- a/tensorflow/core/kernels/image/image_ops.cc
+++ b/tensorflow/core/kernels/image/image_ops.cc
@@ -96,11 +96,12 @@ void DoImageProjectiveTransformOp(OpKernelContext* ctx,
   }
 
   Tensor* output_t;
+  TensorShape output_shape;
   OP_REQUIRES_OK(
-      ctx, ctx->allocate_output(0,
-                                TensorShape({images_t.dim_size(0), out_height,
-                                             out_width, images_t.dim_size(3)}),
-                                &output_t));
+      ctx, TensorShape::BuildTensorShape({images_t.dim_size(0), out_height,
+                                          out_width, images_t.dim_size(3)},
+                                         &output_shape));
+  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, output_shape, &output_t));
   auto output = output_t->tensor<T, 4>();
   auto images = images_t.tensor<T, 4>();
   auto transform = transform_t.matrix<float>();
diff --git a/tensorflow/core/kernels/image/mirror_pad_op.cc b/tensorflow/core/kernels/image/mirror_pad_op.cc
index 9b9ba452517118..b4bf3b3997513f 100644
--- a/tensorflow/core/kernels/image/mirror_pad_op.cc
+++ b/tensorflow/core/kernels/image/mirror_pad_op.cc
@@ -297,13 +297,21 @@ class MirrorPadGradOp : public OpKernel {
     TensorShape output_shape;
     typename TTypes<Tpaddings>::ConstMatrix paddings = in1.matrix<Tpaddings>();
     for (int d = 0; d < dims; ++d) {
-      const Tpaddings before = paddings(d, 0);  // Pad before existing elements.
-      const Tpaddings after = paddings(d, 1);   // Pad after existing elements.
+      const int64_t before = paddings(d, 0);  // Pad before existing elements.
+      const int64_t after = paddings(d, 1);   // Pad after existing elements.
       OP_REQUIRES(context, before >= 0 && after >= 0,
                   errors::InvalidArgument(
                       "Paddings must be non-negative: ", before, ", ", after));
 
-      const int64_t out_size = in0.dim_size(d) - (before + after);
+      const int64_t in_size = in0.dim_size(d);
+      const int64_t total_padding = before + after;
+      OP_REQUIRES(
+          context, total_padding < in_size && total_padding >= 0,
+          errors::InvalidArgument(
+              "Total paddings must be less than the input dimension size: ",
+              total_padding, " was not less than ", in_size));
+
+      const int64_t out_size = in_size - total_padding;
       if (offset_ == 0) {  // SYMMETRIC mode.
         OP_REQUIRES(context, before <= out_size && after <= out_size,
                     errors::InvalidArgument("paddings must be no greater "
diff --git a/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
index a54b60f0099a0b..0d0e0cbdbe639e 100644
--- a/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/image/resize_nearest_neighbor_op.cc
@@ -257,11 +257,11 @@ class ResizeNearestNeighborOpGrad : public OpKernel {
     const int64_t out_width = sizes(1);
 
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(
-        context,
-        context->allocate_output(
-            0, TensorShape({batch_size, out_height, out_width, channels}),
-            &output));
+    TensorShape shape;
+    OP_REQUIRES_OK(context,
+                   TensorShape::BuildTensorShape(
+                       {batch_size, out_height, out_width, channels}, &shape));
+    OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output));
 
     // Return if the output is empty.
     if (output->NumElements() == 0) return;
diff --git a/tensorflow/core/kernels/linalg/linalg_ops_common.cc b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
index bb55f7de0011a9..676111f4bf14d6 100644
--- a/tensorflow/core/kernels/linalg/linalg_ops_common.cc
+++ b/tensorflow/core/kernels/linalg/linalg_ops_common.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/linalg/linalg_ops_common.h"
 
+#include <initializer_list>
 #include <utility>
 
 #include "third_party/eigen3/Eigen/Core"
@@ -22,7 +23,9 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -152,6 +155,10 @@ void LinearAlgebraOp<InputScalar, OutputScalar>::AnalyzeInputs(
     input_matrix_shapes->emplace_back(
         std::initializer_list<int64_t>({num_rows, num_cols}));
     inputs->emplace_back(&in);
+    OP_REQUIRES(
+        context, in.dtype() == DataTypeToEnum<InputScalar>::v(),
+        errors::InvalidArgument("Invalid input dtype ", in.dtype(), " vs ",
+                                DataTypeToEnum<InputScalar>::v()));
   }
   // Have the derived class validate that the inputs are as expected.
   ValidateInputMatrixShapes(context, *input_matrix_shapes);
@@ -212,6 +219,11 @@ void LinearAlgebraOp<InputScalar, OutputScalar>::PrepareOutputs(
       OP_REQUIRES_OK(context, context->allocate_output(
                                   output_idx, output_tensor_shape, &out));
     }
+    OP_REQUIRES(
+        context, out->dtype() == DataTypeToEnum<OutputScalar>::v(),
+        errors::InvalidArgument("Invalid output dtype ", out->dtype(), " vs ",
+                                DataTypeToEnum<OutputScalar>::v()));
+
     outputs->emplace_back(out);
   }
 }
diff --git a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
index a3532f765a414b..6168baac069d68 100644
--- a/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/linalg/svd_op_gpu.cu.cc
@@ -395,6 +395,12 @@ class SvdOpGpu : public AsyncOpKernel {
     OP_REQUIRES_OK_ASYNC(context, context->allocate_output(2, shapeV, &outputV),
                          done);
 
+    // If there are zero batches, we are done.
+    if (shapeRaw.num_elements() == 0) {
+      done();
+      return;
+    }
+
     if (n == 0 || m == 0) {
       if (n == m || !compute_uv_ || !full_matrices_) {
         // S, U, and V are all empty. Nothing to do.
diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc
index 5f7943a9bad044..64055adf1e2ee3 100644
--- a/tensorflow/core/kernels/list_kernels.cc
+++ b/tensorflow/core/kernels/list_kernels.cc
@@ -21,19 +21,21 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/list_kernels.h"
 
+#include <algorithm>
+#include <iterator>
 #include <limits>
+#include <memory>
+#include <utility>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/allocator.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
-#include "tensorflow/core/kernels/concat_lib.h"
-#include "tensorflow/core/lib/core/coding.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/util/util.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -49,6 +51,9 @@ Status TensorShapeFromTensor(const Tensor& t, PartialTensorShape* out) {
     return errors::InvalidArgument(
         "The only valid scalar shape tensor is the fully unknown shape "
         "specified as -1.");
+  } else if (t.shape().dims() != 1) {
+    return errors::InvalidArgument("Shape must be at most rank 1 but is rank ",
+                                   t.shape().dims());
   }
   if (t.dtype() == DT_INT32) {
     return PartialTensorShape::MakePartialShape(t.vec<int32>().data(),
@@ -319,6 +324,11 @@ class TensorListReserve : public OpKernel {
   void Compute(OpKernelContext* c) override {
     PartialTensorShape element_shape;
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(0), &element_shape));
+    OP_REQUIRES(
+        c, TensorShapeUtils::IsScalar(c->input(1).shape()),
+        errors::InvalidArgument(
+            "The num_elements to reserve must be a tensor size 1, but got ",
+            c->input(1).shape()));
     int32_t num_elements = c->input(1).scalar<int32>()();
     OP_REQUIRES(c, num_elements >= 0,
                 errors::InvalidArgument("The num_elements to reserve must be a "
@@ -365,6 +375,8 @@ class TensorListResize : public OpKernel {
   void Compute(OpKernelContext* c) override {
     const TensorList* input_list = nullptr;
     OP_REQUIRES_OK(c, GetInputList(c, 0, &input_list));
+    OP_REQUIRES(c, TensorShapeUtils::IsScalar(c->input(1).shape()),
+                errors::InvalidArgument("size must be a scalar"));
     int32_t size = c->input(1).scalar<int32>()();
     OP_REQUIRES(
         c, size >= 0,
diff --git a/tensorflow/core/kernels/list_kernels.h b/tensorflow/core/kernels/list_kernels.h
index 29582c593942e9..32760219241827 100644
--- a/tensorflow/core/kernels/list_kernels.h
+++ b/tensorflow/core/kernels/list_kernels.h
@@ -393,8 +393,11 @@ class TensorListConcat : public OpKernel {
   void Compute(OpKernelContext* c) override {
     PartialTensorShape element_shape_except_first_dim;
     if (!element_shape_.unknown_rank()) {
-      element_shape_except_first_dim = PartialTensorShape(
-          gtl::ArraySlice<int64_t>(element_shape_.dim_sizes()).subspan(1));
+      auto dim_sizes = element_shape_.dim_sizes();
+      OP_REQUIRES(c, !dim_sizes.empty(),
+                  errors::InvalidArgument("element_shape must not be empty"));
+      element_shape_except_first_dim =
+          PartialTensorShape(gtl::ArraySlice<int64_t>(dim_sizes).subspan(1));
     }
     // Check that the input Variant tensor is indeed a TensorList and has the
     // correct element type.
@@ -768,6 +771,11 @@ class TensorListFromTensor : public OpKernel {
     attr.set_on_host(true);
     OP_REQUIRES_OK(c, c->allocate_output(0, {}, &output_tensor, attr));
     PartialTensorShape element_shape;
+    OP_REQUIRES(
+        c, !TensorShapeUtils::IsMatrixOrHigher(c->input(1).shape()),
+        errors::InvalidArgument(
+            "TensorListFromTensor: element_shape must be at most rank 1 but ",
+            "has the shape of ", c->input(1).shape().DebugString()));
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(1), &element_shape));
     TensorList output_list;
     const Tensor& t = c->input(0);
@@ -894,10 +902,20 @@ class TensorListScatter : public OpKernel {
     OP_REQUIRES_OK(c, c->allocate_output(0, {}, &output_tensor, attr));
     Tensor indices = c->input(1);
     PartialTensorShape element_shape;
+    OP_REQUIRES(
+        c, !TensorShapeUtils::IsMatrixOrHigher(c->input(2).shape()),
+        errors::InvalidArgument(
+            "TensorListScatter: element_shape must be at most rank 1 but has ",
+            "the shape of ", c->input(2).shape().DebugString()));
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(2), &element_shape));
     // TensorListScatterV2 passes the num_elements input, TensorListScatter does
     // not.
-    int num_elements = c->num_inputs() >= 4 ? c->input(3).scalar<int>()() : -1;
+    int num_elements = -1;
+    if (c->num_inputs() >= 4) {
+      OP_REQUIRES(c, TensorShapeUtils::IsScalar(c->input(3).shape()),
+                  errors::InvalidArgument("num_elements must be a scalar"));
+      num_elements = c->input(3).scalar<int>()();
+    }
     OP_REQUIRES(c, num_elements >= -1,
                 errors::InvalidArgument(
                     "TensorListScatter expects num_elements >= -1, found: ",
diff --git a/tensorflow/core/kernels/lrn_op.cc b/tensorflow/core/kernels/lrn_op.cc
index 31aaf018329b52..0d6d24d3dce2df 100644
--- a/tensorflow/core/kernels/lrn_op.cc
+++ b/tensorflow/core/kernels/lrn_op.cc
@@ -668,7 +668,8 @@ class LRNGradOp : public OpKernel {
         in_image.dim_size(0) == batch && in_image.dim_size(1) == rows &&
             in_image.dim_size(2) == cols && in_image.dim_size(3) == depth &&
             out_image.dim_size(0) == batch && out_image.dim_size(1) == rows &&
-            out_image.dim_size(2) == cols && out_image.dim_size(3) == depth,
+            out_image.dim_size(2) == cols && out_image.dim_size(3) == depth &&
+            out_image.dims() == 4,
         errors::InvalidArgument(
             "input_grads, input_image, and out_image should have the same "
             "shape"));
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index 9edd5cf6a6d52b..4fb198690d5ad2 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -1268,6 +1268,13 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
         ShapeFromFormat(data_format_, params.tensor_in_batch, params.out_height,
                         params.out_width, params.depth);
 
+    // Degenerate pooling output should return an empty tensor.
+    if (out_shape.num_elements() == 0) {
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+      return;
+    }
+
     // Assuming qint8 <--> NCHW_VECT_C (int8x4) here.
     constexpr bool is_int8x4 = std::is_same<T, qint8>::value;
     OP_REQUIRES(context, (is_int8x4 == (data_format_ == FORMAT_NCHW_VECT_C)),
diff --git a/tensorflow/core/kernels/mfcc.cc b/tensorflow/core/kernels/mfcc.cc
index 8c755e0df87546..cb4416f7bd3092 100644
--- a/tensorflow/core/kernels/mfcc.cc
+++ b/tensorflow/core/kernels/mfcc.cc
@@ -38,8 +38,10 @@ bool Mfcc::Initialize(int input_length, double input_sample_rate) {
   bool initialized = mel_filterbank_.Initialize(
       input_length, input_sample_rate, filterbank_channel_count_,
       lower_frequency_limit_, upper_frequency_limit_);
-  initialized &=
-      dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_);
+  if (initialized) {
+    initialized =
+        dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_);
+  }
   initialized_ = initialized;
   return initialized;
 }
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank.cc b/tensorflow/core/kernels/mfcc_mel_filterbank.cc
index 8eb2d9d8309f50..c5c2d29d37b99d 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank.cc
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank.cc
@@ -32,6 +32,8 @@ limitations under the License.
 
 #include <math.h>
 
+#include <limits>
+
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
@@ -74,7 +76,17 @@ bool MfccMelFilterbank::Initialize(int input_length, double input_sample_rate,
 
   // An extra center frequency is computed at the top to get the upper
   // limit on the high side of the final triangular filter.
-  center_frequencies_.resize(num_channels_ + 1);
+  std::size_t center_frequencies_size = std::size_t(num_channels_) + 1;
+  if (center_frequencies_size >= std::numeric_limits<int>::max() ||
+      center_frequencies_size > center_frequencies_.max_size()) {
+    LOG(ERROR) << "Number of filterbank channels must be less than "
+               << std::numeric_limits<int>::max()
+               << " and less than or equal to "
+               << center_frequencies_.max_size();
+    return false;
+  }
+  center_frequencies_.resize(center_frequencies_size);
+
   const double mel_low = FreqToMel(lower_frequency_limit);
   const double mel_hi = FreqToMel(upper_frequency_limit);
   const double mel_span = mel_hi - mel_low;
diff --git a/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc b/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
index 54f31e1699ef18..26b5afed135051 100644
--- a/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
+++ b/tensorflow/core/kernels/mfcc_mel_filterbank_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/mfcc_mel_filterbank.h"
 
+#include <limits>
 #include <vector>
 
 #include "tensorflow/core/platform/test.h"
@@ -85,4 +86,37 @@ TEST(MfccMelFilterbankTest, IgnoresExistingContentOfOutputVector) {
   }
 }
 
+TEST(MfccMelFilterbankTest, FailsWhenChannelsGreaterThanMaxIntValue) {
+  // Test for bug where vector throws a length_error when it suspects the size
+  // to be more than it's max_size. For now, we fail initialization when the
+  // number of requested channels is >= the maximum value int can take (since
+  // num_channels_ is an int).
+  MfccMelFilterbank filterbank;
+
+  const int kSampleCount = 513;
+  std::size_t num_channels = std::numeric_limits<int>::max();
+  bool initialized = filterbank.Initialize(
+      kSampleCount, 2 /* sample rate */, num_channels /* channels */,
+      1.0 /*  lower frequency limit */, 5.0 /* upper frequency limit */);
+
+  EXPECT_FALSE(initialized);
+}
+
+TEST(MfccMelFilterbankTest, FailsWhenChannelsGreaterThanMaxSize) {
+  // Test for bug where vector throws a length_error when it suspects the size
+  // to be more than it's max_size. For now, we fail initialization when the
+  // number of requested channels is > than std::vector<double>::max_size().
+  MfccMelFilterbank filterbank;
+
+  const int kSampleCount = 513;
+  // Set num_channels to exceed the max_size a double vector can
+  // theoretically take.
+  std::size_t num_channels = std::vector<double>().max_size() + 1;
+  bool initialized = filterbank.Initialize(
+      kSampleCount, 2 /* sample rate */, num_channels /* channels */,
+      1.0 /*  lower frequency limit */, 5.0 /* upper frequency limit */);
+
+  EXPECT_FALSE(initialized);
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mfcc_op.cc b/tensorflow/core/kernels/mfcc_op.cc
index 358a420c1606ab..2c5f9560aaa31c 100644
--- a/tensorflow/core/kernels/mfcc_op.cc
+++ b/tensorflow/core/kernels/mfcc_op.cc
@@ -25,7 +25,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Create a speech fingerpring from spectrogram data.
+// Create a speech fingerprint from spectrogram data.
 class MfccOp : public OpKernel {
  public:
   explicit MfccOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -60,10 +60,12 @@ class MfccOp : public OpKernel {
     mfcc.set_lower_frequency_limit(lower_frequency_limit_);
     mfcc.set_filterbank_channel_count(filterbank_channel_count_);
     mfcc.set_dct_coefficient_count(dct_coefficient_count_);
-    OP_REQUIRES(context, mfcc.Initialize(spectrogram_channels, sample_rate),
-                errors::InvalidArgument(
-                    "Mfcc initialization failed for channel count ",
-                    spectrogram_channels, " and sample rate ", sample_rate));
+    OP_REQUIRES(
+        context, mfcc.Initialize(spectrogram_channels, sample_rate),
+        errors::InvalidArgument("Mfcc initialization failed for channel count ",
+                                spectrogram_channels, ", sample rate ",
+                                sample_rate, " and filterbank_channel_count ",
+                                filterbank_channel_count_));
 
     Tensor* output_tensor = nullptr;
     OP_REQUIRES_OK(context,
diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
index 24b7e3f4ebdd5a..a007d37c4e290a 100644
--- a/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
+++ b/tensorflow/core/kernels/parameterized_truncated_normal_op.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_util.h"
 #include "tensorflow/core/kernels/stateless_random_ops.h"
 #include "tensorflow/core/lib/random/random_distributions.h"
 #include "tensorflow/core/platform/logging.h"
@@ -630,20 +631,18 @@ class ParameterizedTruncatedNormalOp : public OpKernel {
     OP_REQUIRES(ctx, shape_tensor.NumElements() > 0,
                 errors::InvalidArgument("Shape tensor must not be empty, got ",
                                         shape_tensor.DebugString()));
-    int32_t num_batches = shape_tensor.flat<int32>()(0);
+    TensorShape tensor_shape;
+    OP_REQUIRES_OK(ctx, tensor::MakeShape(shape_tensor, &tensor_shape));
 
+    int32_t num_batches = tensor_shape.dim_size(0);
     int32_t samples_per_batch = 1;
-    const int32_t num_dims = shape_tensor.dim_size(0);
+    const int32_t num_dims = tensor_shape.dims();
     for (int32_t i = 1; i < num_dims; i++) {
-      samples_per_batch *= shape_tensor.flat<int32>()(i);
+      samples_per_batch *= tensor_shape.dim_size(i);
     }
     const int32_t num_elements = num_batches * samples_per_batch;
 
     // Allocate the output before fudging num_batches and samples_per_batch.
-    auto shape_vec = shape_tensor.flat<int32>();
-    TensorShape tensor_shape;
-    OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(
-                            shape_vec.data(), shape_vec.size(), &tensor_shape));
     Tensor* samples_tensor;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, tensor_shape, &samples_tensor));
 
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 57f4ac80ecab75..60f1718f240e2a 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -523,7 +523,7 @@ class AvgPooling3dGradOp : public OpKernel {
     TensorShape output_shape;
     auto shape_vec = tensor_in_shape.vec<int32>();
     for (int64_t i = 0; i < tensor_in_shape.NumElements(); ++i) {
-      output_shape.AddDim(shape_vec(i));
+      OP_REQUIRES_OK(context, output_shape.AddDimWithStatus(shape_vec(i)));
     }
 
     Tensor* output;
diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.cc b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
index da9257fb9c9af1..ae02b57861ac02 100644
--- a/tensorflow/core/kernels/quantize_and_dequantize_op.cc
+++ b/tensorflow/core/kernels/quantize_and_dequantize_op.cc
@@ -21,19 +21,23 @@ limitations under the License.
 #define EIGEN_USE_GPU
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-#include "tensorflow/core/kernels/quantize_and_dequantize_op.h"
-
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/kernels/quantize_and_dequantize_op.h"
 #include "tensorflow/core/lib/core/errors.h"
 
 namespace tensorflow {
+namespace {
 
-typedef Eigen::ThreadPoolDevice CPUDevice;
-typedef Eigen::GpuDevice GPUDevice;
+using CpuDevice = ::Eigen::ThreadPoolDevice;
+using GpuDevice = ::Eigen::GpuDevice;
+using ::tensorflow::errors::InvalidArgument;
+
+}  // namespace
 
 // Simulate quantization precision loss in a float tensor by:
 // 1. Quantize the tensor to fixed point numbers, which should match the target
@@ -49,8 +53,8 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &axis_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits_));
     OP_REQUIRES(ctx, num_bits_ > 0 && num_bits_ < (signed_input_ ? 62 : 63),
-                errors::InvalidArgument("num_bits is out of range: ", num_bits_,
-                                        " with signed_input_ ", signed_input_));
+                InvalidArgument("num_bits is out of range: ", num_bits_,
+                                " with signed_input_ ", signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("range_given", &range_given_));
 
     string round_mode_string;
@@ -58,10 +62,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
     OP_REQUIRES(
         ctx,
         (round_mode_string == "HALF_UP" || round_mode_string == "HALF_TO_EVEN"),
-        errors::InvalidArgument("Round mode string must be "
-                                "'HALF_UP' or "
-                                "'HALF_TO_EVEN', is '" +
-                                round_mode_string + "'"));
+        InvalidArgument("Round mode string must be "
+                        "'HALF_UP' or "
+                        "'HALF_TO_EVEN', is '" +
+                        round_mode_string + "'"));
     if (round_mode_string == "HALF_UP") {
       round_mode_ = ROUND_HALF_UP;
     } else if (round_mode_string == "HALF_TO_EVEN") {
@@ -72,12 +76,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    OP_REQUIRES(
-        ctx, axis_ >= -1,
-        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
-    OP_REQUIRES(
-        ctx, (axis_ == -1 || axis_ < input.shape().dims()),
-        errors::InvalidArgument("Shape must be at least rank ", axis_ + 1,
+    OP_REQUIRES(ctx, axis_ >= -1,
+                InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+                InvalidArgument("Shape must be at least rank ", axis_ + 1,
                                 " but is rank ", input.shape().dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor input_min_tensor;
@@ -91,21 +93,21 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
         auto min_val = input_min_tensor.scalar<T>()();
         auto max_val = input_max_tensor.scalar<T>()();
         OP_REQUIRES(ctx, min_val <= max_val,
-                    errors::InvalidArgument("Invalid range: input_min ",
-                                            min_val, " > input_max ", max_val));
+                    InvalidArgument("Invalid range: input_min ", min_val,
+                                    " > input_max ", max_val));
       } else {
-        OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_min_tensor has incorrect size, was ",
-                        input_min_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_min_tensor.shape()));
-        OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_max_tensor has incorrect size, was ",
-                        input_max_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_max_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_min_tensor.dim_size(0) == depth,
+            InvalidArgument("input_min_tensor has incorrect size, was ",
+                            input_min_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_min_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_max_tensor.dim_size(0) == depth,
+            InvalidArgument("input_max_tensor has incorrect size, was ",
+                            input_max_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_max_tensor.shape()));
       }
     } else {
       auto range_shape = (axis_ == -1) ? TensorShape({}) : TensorShape({depth});
@@ -158,38 +160,34 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
     Tensor* input_backprop = nullptr;
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, input.shape(), &input_backprop));
-    OP_REQUIRES(
-        ctx, axis_ >= -1,
-        errors::InvalidArgument("Axis must be at least -1. Found ", axis_));
+    OP_REQUIRES(ctx, axis_ >= -1,
+                InvalidArgument("Axis must be at least -1. Found ", axis_));
     OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Axis should be -1 or 0 or a positive value less than ",
                     input.shape().dims(), "but given axis value was ", axis_));
 
-    OP_REQUIRES(
-        ctx, input.IsSameSize(gradient),
-        errors::InvalidArgument("gradient and input must be the same size"));
+    OP_REQUIRES(ctx, input.IsSameSize(gradient),
+                InvalidArgument("gradient and input must be the same size"));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     const Tensor& input_min_tensor = ctx->input(2);
     OP_REQUIRES(ctx,
                 input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1,
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Input min tensor must have dimension 0 or 1. Received ",
                     input_min_tensor.dims(), "."));
     const Tensor& input_max_tensor = ctx->input(3);
     OP_REQUIRES(ctx,
                 input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1,
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Input max tensor must have dimension 0 or 1. Received ",
                     input_max_tensor.dims(), "."));
     if (axis_ != -1) {
-      OP_REQUIRES(
-          ctx, input_min_tensor.dim_size(0) == depth,
-          errors::InvalidArgument("min has incorrect size, expected ", depth,
+      OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
+                  InvalidArgument("min has incorrect size, expected ", depth,
                                   " was ", input_min_tensor.dim_size(0)));
-      OP_REQUIRES(
-          ctx, input_max_tensor.dim_size(0) == depth,
-          errors::InvalidArgument("max has incorrect size, expected ", depth,
+      OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
+                  InvalidArgument("max has incorrect size, expected ", depth,
                                   " was ", input_max_tensor.dim_size(0)));
     }
 
@@ -203,12 +201,12 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
                    ctx->allocate_output(2, min_max_shape, &input_max_backprop));
 
     if (axis_ == -1) {
-      OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(input_min_tensor.shape()),
-                  errors::InvalidArgument(
-                      "input_min must be a scalar if axis is unspecified"));
-      OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(input_max_tensor.shape()),
-                  errors::InvalidArgument(
-                      "input_max must be a scalar if axis is unspecified"));
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::IsScalar(input_min_tensor.shape()),
+          InvalidArgument("input_min must be a scalar if axis is unspecified"));
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::IsScalar(input_max_tensor.shape()),
+          InvalidArgument("input_max must be a scalar if axis is unspecified"));
       functor::QuantizeAndDequantizeOneScaleGradientFunctor<Device, T> f;
       f(ctx->eigen_device<Device>(), gradient.template flat<T>(),
         input.template flat<T>(), input_min_tensor.scalar<T>(),
@@ -252,21 +250,25 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
     OP_REQUIRES(ctx, axis_ < input.dims(),
-                errors::InvalidArgument(
+                InvalidArgument(
                     "Axis requested is larger than input dimensions. Axis: ",
                     axis_, " Input Dimensions: ", input.dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
 
-    Tensor num_bits_tensor;
-    num_bits_tensor = ctx->input(3);
-    int num_bits_val = num_bits_tensor.scalar<int32>()();
+    // Get num_bits and validate.
+    const Tensor num_bits_tensor = ctx->input(3);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(num_bits_tensor.shape()),
+                InvalidArgument("Invalid shape. The `num_bits` tensor should "
+                                "be a scalar. Got dimensions: ",
+                                num_bits_tensor.dims()));
 
-    OP_REQUIRES(
-        ctx, num_bits_val > 0 && num_bits_val < (signed_input_ ? 62 : 63),
-        errors::InvalidArgument("num_bits is out of range: ", num_bits_val,
-                                " with signed_input_ ", signed_input_));
+    const int num_bits_val = num_bits_tensor.scalar<int32>()();
+    OP_REQUIRES(ctx,
+                num_bits_val > 0 && num_bits_val < (signed_input_ ? 62 : 63),
+                InvalidArgument("num_bits is out of range: ", num_bits_val,
+                                " with `signed_input_` ", signed_input_));
 
     Tensor input_min_tensor;
     Tensor input_max_tensor;
@@ -274,24 +276,24 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
       input_min_tensor = ctx->input(1);
       input_max_tensor = ctx->input(2);
       if (axis_ == -1) {
-        auto min_val = input_min_tensor.scalar<T>()();
-        auto max_val = input_max_tensor.scalar<T>()();
+        const auto min_val = input_min_tensor.scalar<T>()();
+        const auto max_val = input_max_tensor.scalar<T>()();
         OP_REQUIRES(ctx, min_val <= max_val,
-                    errors::InvalidArgument("Invalid range: input_min ",
-                                            min_val, " > input_max ", max_val));
+                    InvalidArgument("Invalid range: input_min ", min_val,
+                                    " > input_max ", max_val));
       } else {
-        OP_REQUIRES(ctx, input_min_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_min_tensor has incorrect size, was ",
-                        input_min_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_min_tensor.shape()));
-        OP_REQUIRES(ctx, input_max_tensor.dim_size(0) == depth,
-                    errors::InvalidArgument(
-                        "input_max_tensor has incorrect size, was ",
-                        input_max_tensor.dim_size(0), " expected ", depth,
-                        " to match dim ", axis_, " of the input ",
-                        input_max_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_min_tensor.dim_size(0) == depth,
+            InvalidArgument("input_min_tensor has incorrect size, was ",
+                            input_min_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_min_tensor.shape()));
+        OP_REQUIRES(
+            ctx, input_max_tensor.dim_size(0) == depth,
+            InvalidArgument("input_max_tensor has incorrect size, was ",
+                            input_max_tensor.dim_size(0), " expected ", depth,
+                            " to match dim ", axis_, " of the input ",
+                            input_max_tensor.shape()));
       }
     } else {
       auto range_shape = (axis_ == -1) ? TensorShape({}) : TensorShape({depth});
@@ -331,15 +333,14 @@ class QuantizeAndDequantizeOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("signed_input", &signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits_));
     OP_REQUIRES(ctx, num_bits_ > 0 && num_bits_ < (signed_input_ ? 62 : 63),
-                errors::InvalidArgument("num_bits is out of range: ", num_bits_,
-                                        " with signed_input_ ", signed_input_));
+                InvalidArgument("num_bits is out of range: ", num_bits_,
+                                " with signed_input_ ", signed_input_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("range_given", &range_given_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("input_min", &input_min_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("input_max", &input_max_));
     if (range_given_) {
-      OP_REQUIRES(
-          ctx, input_min_ <= input_max_,
-          errors::InvalidArgument("Invalid range: input_min ", input_min_,
+      OP_REQUIRES(ctx, input_min_ <= input_max_,
+                  InvalidArgument("Invalid range: input_min ", input_min_,
                                   " > input_max ", input_max_));
     }
   }
@@ -371,53 +372,53 @@ class QuantizeAndDequantizeOp : public OpKernel {
   float input_max_;
 };
 
-// Specializations for CPUDevice.
+// Specializations for CpuDevice.
 
 namespace functor {
 template <typename T>
-struct QuantizeAndDequantizeOneScaleFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstVec input,
+struct QuantizeAndDequantizeOneScaleFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T>::ConstVec input,
                   const bool signed_input, const int num_bits,
                   const bool range_given, Tensor* input_min_tensor,
                   Tensor* input_max_tensor, QuantizerRoundMode round_mode,
                   bool narrow_range, typename TTypes<T>::Vec out) {
-    QuantizeAndDequantizeOneScaleImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizeOneScaleImpl<CpuDevice, T>::Compute(
         d, input, signed_input, num_bits, range_given, input_min_tensor,
         input_max_tensor, round_mode, narrow_range, out);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizePerChannelFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T, 3>::ConstTensor input,
+struct QuantizeAndDequantizePerChannelFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T, 3>::ConstTensor input,
                   bool signed_input, int num_bits, bool range_given,
                   Tensor* input_min_tensor, Tensor* input_max_tensor,
                   QuantizerRoundMode round_mode, bool narrow_range,
                   typename TTypes<T, 3>::Tensor out) {
-    QuantizeAndDequantizePerChannelImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizePerChannelImpl<CpuDevice, T>::Compute(
         d, input, signed_input, num_bits, range_given, input_min_tensor,
         input_max_tensor, round_mode, narrow_range, out);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d, typename TTypes<T>::ConstFlat gradient,
+struct QuantizeAndDequantizeOneScaleGradientFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d, typename TTypes<T>::ConstFlat gradient,
                   typename TTypes<T>::ConstFlat input,
                   typename TTypes<T>::ConstScalar input_min_tensor,
                   typename TTypes<T>::ConstScalar input_max_tensor,
                   typename TTypes<T>::Flat input_backprop,
                   typename TTypes<T>::Scalar input_min_backprop,
                   typename TTypes<T>::Scalar input_max_backprop) {
-    QuantizeAndDequantizeOneScaleGradientImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizeOneScaleGradientImpl<CpuDevice, T>::Compute(
         d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
         input_min_backprop, input_max_backprop);
   }
 };
 
 template <typename T>
-struct QuantizeAndDequantizePerChannelGradientFunctor<CPUDevice, T> {
-  void operator()(const CPUDevice& d,
+struct QuantizeAndDequantizePerChannelGradientFunctor<CpuDevice, T> {
+  void operator()(const CpuDevice& d,
                   typename TTypes<T, 3>::ConstTensor gradient,
                   typename TTypes<T, 3>::ConstTensor input,
                   const Tensor* input_min_tensor,
@@ -425,16 +426,16 @@ struct QuantizeAndDequantizePerChannelGradientFunctor<CPUDevice, T> {
                   typename TTypes<T, 3>::Tensor input_backprop,
                   typename TTypes<T>::Flat input_min_backprop,
                   typename TTypes<T>::Flat input_max_backprop) {
-    QuantizeAndDequantizePerChannelGradientImpl<CPUDevice, T>::Compute(
+    QuantizeAndDequantizePerChannelGradientImpl<CpuDevice, T>::Compute(
         d, gradient, input, input_min_tensor, input_max_tensor, input_backprop,
         input_min_backprop, input_max_backprop);
   }
 };
 
-template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<CPUDevice,
+template struct functor::QuantizeAndDequantizeOneScaleGradientFunctor<CpuDevice,
                                                                       float>;
 template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
-    CPUDevice, double>;
+    CpuDevice, double>;
 
 }  // namespace functor
 
@@ -442,22 +443,22 @@ template struct functor::QuantizeAndDequantizePerChannelGradientFunctor<
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV2")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV3")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV3Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV3Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<CPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<CpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
                               .Device(DEVICE_CPU)                              \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV4GradientOp<CPUDevice, T>);    \
+                          QuantizeAndDequantizeV4GradientOp<CpuDevice, T>);    \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("QuantizeAndDequantize").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      QuantizeAndDequantizeOp<CPUDevice, T>);
+      QuantizeAndDequantizeOp<CpuDevice, T>);
 TF_CALL_float(REGISTER_CPU_KERNEL);
 TF_CALL_double(REGISTER_CPU_KERNEL);
 #undef REGISTER_CPU_KERNEL
@@ -470,29 +471,29 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV3")                      \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .HostMemory("num_bits")                          \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV3Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV3Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4")                      \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV2Op<GPUDevice, T>);            \
+                          QuantizeAndDequantizeV2Op<GpuDevice, T>);            \
   REGISTER_KERNEL_BUILDER(Name("QuantizeAndDequantizeV4Grad")                  \
                               .Device(DEVICE_GPU)                              \
                               .HostMemory("input_min")                         \
                               .HostMemory("input_max")                         \
                               .TypeConstraint<T>("T"),                         \
-                          QuantizeAndDequantizeV4GradientOp<GPUDevice, T>);    \
+                          QuantizeAndDequantizeV4GradientOp<GpuDevice, T>);    \
   REGISTER_KERNEL_BUILDER(                                                     \
       Name("QuantizeAndDequantize").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      QuantizeAndDequantizeOp<GPUDevice, T>);
+      QuantizeAndDequantizeOp<GpuDevice, T>);
 TF_CALL_float(REGISTER_GPU_KERNEL);
 TF_CALL_double(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
index 1b948c8108de87..83f8996b4cc746 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc
@@ -40,8 +40,20 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    const float input_min_float = ctx->input(1).flat<float>()(0);
-    const float input_max_float = ctx->input(2).flat<float>()(0);
+    const Tensor& input_min = ctx->input(1);
+    const Tensor& input_max = ctx->input(2);
+
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_min.shape()),
+        errors::InvalidArgument("`input_min` must be rank 0 but is rank ",
+                                input_min.dims()));
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_max.shape()),
+        errors::InvalidArgument("`input_max` must be rank 0 but is rank ",
+                                input_max.dims()));
+
+    const float input_min_float = input_min.scalar<float>()();
+    const float input_max_float = input_max.scalar<float>()();
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
     Tensor* output_min = nullptr;
diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
index 583ba1fded79e8..f3d777e8e0e3c5 100644
--- a/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
+++ b/tensorflow/core/kernels/quantize_down_and_shrink_range_op_test.cc
@@ -53,8 +53,8 @@ TEST_F(QuantizeDownAndShrinkRangeTest, HandCrafted) {
   const int value_count = 3;
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   TF_ASSERT_OK(RunOpKernel());
   Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count}));
   test::FillValues<quint8>(&expected, {0, 128, 255});
diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc
index 2896c3d45a7023..36d321a8e17138 100644
--- a/tensorflow/core/kernels/quantized_activation_ops.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops.cc
@@ -32,8 +32,21 @@ class QuantizedReluOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input_tensor.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input_tensor.dims()));
+
+    const float min_input = min_input_tensor.scalar<float>()();
+    const float max_input = max_input_tensor.scalar<float>()();
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
@@ -65,8 +78,21 @@ class QuantizedRelu6Op : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input_tensor.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input_tensor.dims()));
+
+    const float min_input = min_input_tensor.scalar<float>()();
+    const float max_input = max_input_tensor.scalar<float>()();
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/quantized_activation_ops_test.cc b/tensorflow/core/kernels/quantized_activation_ops_test.cc
index b3b7cb58b9a455..34c5130f4759b5 100644
--- a/tensorflow/core/kernels/quantized_activation_ops_test.cc
+++ b/tensorflow/core/kernels/quantized_activation_ops_test.cc
@@ -55,8 +55,8 @@ TEST_F(QuantizedActivationsTest, TestRelu) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -86,8 +86,8 @@ TEST_F(QuantizedActivationsTest, TestRelu6) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/quantized_add_op.cc b/tensorflow/core/kernels/quantized_add_op.cc
index 1f9897b9b61cd7..5cf7ed1456034e 100644
--- a/tensorflow/core/kernels/quantized_add_op.cc
+++ b/tensorflow/core/kernels/quantized_add_op.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/errors.h"
@@ -457,10 +458,28 @@ class QuantizedAddOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& x = context->input(0);
     const Tensor& y = context->input(1);
-    const float min_x = context->input(2).flat<float>()(0);
-    const float max_x = context->input(3).flat<float>()(0);
-    const float min_y = context->input(4).flat<float>()(0);
-    const float max_y = context->input(5).flat<float>()(0);
+    const Tensor& min_x_tensor = context->input(2);
+    const Tensor& max_x_tensor = context->input(3);
+    const Tensor& min_y_tensor = context->input(4);
+    const Tensor& max_y_tensor = context->input(5);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
+                errors::InvalidArgument("`min_x` must be rank 0 but is rank ",
+                                        min_x_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
+                errors::InvalidArgument("`max_x` must be rank 0 but is rank ",
+                                        max_x_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
+                errors::InvalidArgument("`min_y` must be rank 0 but is rank ",
+                                        min_y_tensor.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
+                errors::InvalidArgument("`max_y` must be rank 0 but is rank ",
+                                        max_y_tensor.dims()));
+
+    const float min_x = min_x_tensor.scalar<float>()();
+    const float max_x = max_x_tensor.scalar<float>()();
+    const float min_y = min_y_tensor.scalar<float>()();
+    const float max_y = max_y_tensor.scalar<float>()();
 
     BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
     if (!bcast.IsValid()) {
diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc
index db0e21a498011d..c064f9b1b21e25 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
@@ -38,10 +39,30 @@ class QuantizedBiasAddOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
     const Tensor& bias = context->input(1);
-    const float input_min = context->input(2).flat<float>()(0);
-    const float input_max = context->input(3).flat<float>()(0);
-    const float bias_min = context->input(4).flat<float>()(0);
-    const float bias_max = context->input(5).flat<float>()(0);
+
+    const Tensor& min_input = context->input(2);
+    const Tensor& max_input = context->input(3);
+    const Tensor& min_bias = context->input(4);
+    const Tensor& max_bias = context->input(5);
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(min_input.shape()),
+        errors::InvalidArgument("`min_input` must be rank 0 but is rank ",
+                                min_input.dims()));
+    OP_REQUIRES(
+        context, TensorShapeUtils::IsScalar(max_input.shape()),
+        errors::InvalidArgument("`max_input` must be rank 0 but is rank ",
+                                max_input.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_bias.shape()),
+                errors::InvalidArgument(
+                    "`min_bias` must be rank 0 but is rank ", min_bias.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_bias.shape()),
+                errors::InvalidArgument(
+                    "`max_bias` must be rank 0 but is rank ", max_bias.dims()));
+
+    const float input_min = min_input.flat<float>()(0);
+    const float input_max = max_input.flat<float>()(0);
+    const float bias_min = min_bias.flat<float>()(0);
+    const float bias_max = max_bias.flat<float>()(0);
 
     OP_REQUIRES(context, TensorShapeUtils::IsMatrixOrHigher(input.shape()),
                 errors::InvalidArgument("Input tensor must be at least 2D: ",
diff --git a/tensorflow/core/kernels/quantized_bias_add_op_test.cc b/tensorflow/core/kernels/quantized_bias_add_op_test.cc
index 7b99ceafe261b7..edfae98efa953b 100644
--- a/tensorflow/core/kernels/quantized_bias_add_op_test.cc
+++ b/tensorflow/core/kernels/quantized_bias_add_op_test.cc
@@ -74,10 +74,10 @@ TEST_F(QuantizedBiasAddTest, Small) {
                             input_quantized.flat<quint8>());
   AddInputFromArray<quint8>(bias_quantized.shape(),
                             bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {bias_min});
+  AddInputFromArray<float>(TensorShape({}), {bias_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -156,10 +156,10 @@ TEST_F(QuantizedBiasAddTest, RealData) {
                             input_quantized.flat<quint8>());
   AddInputFromArray<quint8>(bias_quantized.shape(),
                             bias_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
-  AddInputFromArray<float>(TensorShape({1}), {bias_min});
-  AddInputFromArray<float>(TensorShape({1}), {bias_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {bias_min});
+  AddInputFromArray<float>(TensorShape({}), {bias_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/quantized_instance_norm.cc b/tensorflow/core/kernels/quantized_instance_norm.cc
index 4fd967fe3fc46d..405c6aa95bb3e3 100644
--- a/tensorflow/core/kernels/quantized_instance_norm.cc
+++ b/tensorflow/core/kernels/quantized_instance_norm.cc
@@ -25,7 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
-
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 
 #ifdef USE_NEON
@@ -274,8 +274,16 @@ class QuantizedInstanceNorm : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
 
-    float input_min = context->input(1).flat<float>()(0);
-    float input_max = context->input(2).flat<float>()(0);
+    const Tensor& x_min = context->input(1);
+    const Tensor& x_max = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(x_min.shape()),
+                errors::InvalidArgument("`x_min` must be rank 0 but is rank ",
+                                        x_min.dims()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(x_max.shape()),
+                errors::InvalidArgument("`x_max` must be rank 0 but is rank ",
+                                        x_max.dims()));
+    float input_min = x_min.scalar<float>()();
+    float input_max = x_max.scalar<float>()();
     float input_scale = (input_max - input_min) / 255.0f;
 
     OP_REQUIRES(context, input_min < input_max,
diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc
index 9d3b5279e4bb82..ae65dc3b5e38ce 100644
--- a/tensorflow/core/kernels/quantized_matmul_op.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op.cc
@@ -20,11 +20,14 @@ limitations under the License.
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
 #include "public/gemmlowp.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/meta_support.h"
 #include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/kernels/reference_gemm.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -75,9 +78,21 @@ class QuantizedMatMulOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& a = context->input(0);
     const Tensor& b = context->input(1);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(2).shape()),
+                errors::InvalidArgument("min_a must be a scalar, but got shape",
+                                        context->input(2).shape()));
     const float min_a = context->input(2).flat<float>()(0);
+    OP_REQUIRES(context, context->input(3).NumElements() == 1,
+                errors::InvalidArgument("max_a must be a scalar, but got shape",
+                                        context->input(3).shape()));
     const float max_a = context->input(3).flat<float>()(0);
+    OP_REQUIRES(context, context->input(4).NumElements() == 1,
+                errors::InvalidArgument("min_b must be a scalar, but got shape",
+                                        context->input(4).shape()));
     const float min_b = context->input(4).flat<float>()(0);
+    OP_REQUIRES(context, context->input(5).NumElements() == 1,
+                errors::InvalidArgument("max_b must be a scalar, but got shape",
+                                        context->input(5).shape()));
     const float max_b = context->input(5).flat<float>()(0);
 
     // Make sure that we have valid quantization ranges for the input buffers.
diff --git a/tensorflow/core/kernels/quantized_matmul_op_test.cc b/tensorflow/core/kernels/quantized_matmul_op_test.cc
index c9f05dbc10bb8b..f562a2ebcb744f 100644
--- a/tensorflow/core/kernels/quantized_matmul_op_test.cc
+++ b/tensorflow/core/kernels/quantized_matmul_op_test.cc
@@ -62,10 +62,10 @@ TEST_F(QuantizedMatMulTest, Small_NoParams) {
   // | 15 | 16 | 17 | 18 |
   AddInputFromArray<quint8>(TensorShape({3, 4}),
                             {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
 
   TF_ASSERT_OK(RunOpKernel());
   // Here are the results we expect, from hand calculations:
@@ -118,10 +118,10 @@ TEST_F(QuantizedMatMulTest, VerySmall_WithParams) {
   // The B matrix is:
   // |   1 |
   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   TF_ASSERT_OK(RunOpKernel());
   // We're requesting C = A.transposed() * B,
   // so we expect to get these results:
@@ -162,12 +162,50 @@ TEST_F(QuantizedMatMulTest, VerySmall_BadRange) {
   // The B matrix is:
   // |   1 |
   AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
   // Here we set the range so that the min and max are equal, so we expect to
   // see an error when we run.
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
+}
+
+// This test multiplies two 1x1 8bit matrices, but sets invalid quantized min
+// and max values, so we expect to get an error
+TEST_F(QuantizedMatMulTest, VerySmall_BadMinMax) {
+  // These parameters reflect a typical production usage of eight-bit matmuls
+  // in an Inception-style network.
+  const bool transpose_a = true;
+  const int a_rows = 1;
+  const int a_cols = 1;
+  const int b_rows = 1;
+  const int b_cols = 1;
+  const bool transpose_b = false;
+  TF_ASSERT_OK(NodeDefBuilder("quantized_mat_mul_op", "QuantizedMatMul")
+                   .Input(FakeInput(DT_QUINT8))
+                   .Input(FakeInput(DT_QUINT8))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Input(FakeInput(DT_FLOAT))
+                   .Attr("Toutput", DataTypeToEnum<qint32>::v())
+                   .Attr("transpose_a", transpose_a)
+                   .Attr("transpose_b", transpose_b)
+                   .Finalize(node_def()));
+  TF_ASSERT_OK(InitOp());
+  // The A matrix is:
+  // |  -1 |
+  AddInputFromArray<quint8>(TensorShape({a_rows, a_cols}), {11});
+  // The B matrix is:
+  // |   1 |
+  AddInputFromArray<quint8>(TensorShape({b_rows, b_cols}), {0});
+  // Here we set the error of a non scalar min_a value, so we expect to see an
+  // error when we run.
+  AddInputFromArray<float>(TensorShape({1}), {2});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
   EXPECT_EQ(::tensorflow::error::INVALID_ARGUMENT, RunOpKernel().code());
 }
 
@@ -233,10 +271,10 @@ TEST_F(QuantizedMatMulTest, Small_WithParams) {
                                                                3,
                                                                6,
                                                            });
-  AddInputFromArray<float>(TensorShape({1}), {-12.0f});
-  AddInputFromArray<float>(TensorShape({1}), {243.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0});
-  AddInputFromArray<float>(TensorShape({1}), {255.0f});
+  AddInputFromArray<float>(TensorShape({}), {-12.0f});
+  AddInputFromArray<float>(TensorShape({}), {243.0f});
+  AddInputFromArray<float>(TensorShape({}), {0});
+  AddInputFromArray<float>(TensorShape({}), {255.0f});
   TF_ASSERT_OK(RunOpKernel());
   // We're requesting C = A.transposed() * B,
   // so we expect to get these results:
@@ -326,10 +364,10 @@ TEST_F(QuantizedMatMulTest, Medium_WithParams) {
 
   AddInputFromArray<quint8>(a_quantized.shape(), a_quantized.flat<quint8>());
   AddInputFromArray<quint8>(b_quantized.shape(), b_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {a_min});
-  AddInputFromArray<float>(TensorShape({1}), {a_max});
-  AddInputFromArray<float>(TensorShape({1}), {b_min});
-  AddInputFromArray<float>(TensorShape({1}), {b_max});
+  AddInputFromArray<float>(TensorShape({}), {a_min});
+  AddInputFromArray<float>(TensorShape({}), {a_max});
+  AddInputFromArray<float>(TensorShape({}), {b_min});
+  AddInputFromArray<float>(TensorShape({}), {b_max});
   TF_ASSERT_OK(RunOpKernel());
 
   Tensor expected_float(DT_FLOAT, {a_cols, b_cols});
diff --git a/tensorflow/core/kernels/quantized_pooling_ops.cc b/tensorflow/core/kernels/quantized_pooling_ops.cc
index b512369b3c4dd9..5673fb6ee00a5b 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops.cc
+++ b/tensorflow/core/kernels/quantized_pooling_ops.cc
@@ -15,18 +15,18 @@ limitations under the License.
 
 // See docs in ../ops/nn_ops.cc.
 
-#include "tensorflow/core/framework/op_requires.h"
-#include "tensorflow/core/platform/errors.h"
 #define EIGEN_USE_THREADS
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/kernels/pooling_ops_common.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
@@ -67,8 +67,20 @@ class QuantizedAvgPoolingOp : public OpKernel {
       return;
     }
 
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "min_input shape must be rank 0 but is rank ",
+                    min_input_tensor.dims(),
+                    ", received shape: ", min_input_tensor.shape()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "max_input shape must be rank 0 but is rank ",
+                    max_input_tensor.dims(),
+                    ", received shape: ", max_input_tensor.shape()));
+    const float min_input = context->input(1).scalar<float>()();
+    const float max_input = context->input(2).scalar<float>()();
 
     OP_REQUIRES(context, params.depth_window == 1,
                 errors::Unimplemented("Non-spatial pooling is not "
@@ -119,20 +131,20 @@ class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> {
       : MaxPoolingOp<Device, T>(context) {}
 
   void Compute(OpKernelContext* context) override {
-    auto min_input_tensor = context->input(1);
-    auto max_input_tensor = context->input(2);
-    OP_REQUIRES(
-        context, min_input_tensor.NumElements() == 1,
-        errors::InvalidArgument(
-            "min_input must be a scalar float value, got tensor with shape ",
-            min_input_tensor.shape()));
-    OP_REQUIRES(
-        context, max_input_tensor.NumElements() == 1,
-        errors::InvalidArgument(
-            "max_input must be a scalar float value, got tensor with shape ",
-            max_input_tensor.shape()));
-    const float min_input = context->input(1).flat<float>()(0);
-    const float max_input = context->input(2).flat<float>()(0);
+    const Tensor& min_input_tensor = context->input(1);
+    const Tensor& max_input_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "min_input shape must be rank 0 but is rank ",
+                    min_input_tensor.dims(),
+                    ", received shape: ", min_input_tensor.shape()));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_input_tensor.shape()),
+                errors::InvalidArgument(
+                    "max_input shape must be rank 0 but is rank ",
+                    max_input_tensor.dims(),
+                    ", received shape: ", max_input_tensor.shape()));
+    const float min_input = context->input(1).scalar<float>()();
+    const float max_input = context->input(2).scalar<float>()();
     MaxPoolingOp<Device, T>::Compute(context);
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min));
diff --git a/tensorflow/core/kernels/quantized_pooling_ops_test.cc b/tensorflow/core/kernels/quantized_pooling_ops_test.cc
index fc0417e5431b27..9e56890478be24 100644
--- a/tensorflow/core/kernels/quantized_pooling_ops_test.cc
+++ b/tensorflow/core/kernels/quantized_pooling_ops_test.cc
@@ -69,8 +69,8 @@ TEST_F(QuantizedPoolingTest, SmallAveragePooling) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
@@ -114,8 +114,8 @@ TEST_F(QuantizedPoolingTest, SmallMaxPooling) {
 
   AddInputFromArray<quint8>(input_quantized.shape(),
                             input_quantized.flat<quint8>());
-  AddInputFromArray<float>(TensorShape({1}), {input_min});
-  AddInputFromArray<float>(TensorShape({1}), {input_max});
+  AddInputFromArray<float>(TensorShape({}), {input_min});
+  AddInputFromArray<float>(TensorShape({}), {input_max});
   TF_ASSERT_OK(RunOpKernel());
   const Tensor& output_quantized = *GetOutput(0);
   const float output_min = GetOutput(1)->flat<float>()(0);
diff --git a/tensorflow/core/kernels/ragged_range_op.cc b/tensorflow/core/kernels/ragged_range_op.cc
index 066e5d638bbc43..469ef06b4b3bb6 100644
--- a/tensorflow/core/kernels/ragged_range_op.cc
+++ b/tensorflow/core/kernels/ragged_range_op.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <cstdint>
 #include <limits>
 #include <memory>
 #include <string>
@@ -78,8 +79,25 @@ class RaggedRangeOp : public OpKernel {
       T limit = broadcast_limits ? limits(0) : limits(row);
       T delta = broadcast_deltas ? deltas(0) : deltas(row);
       OP_REQUIRES(context, delta != 0, InvalidArgument("Requires delta != 0"));
-      rt_nested_splits(row + 1) =
-          rt_nested_splits(row) + RangeSize(start, limit, delta);
+      int64_t size;  // The number of elements in the specified range.
+      if (((delta > 0) && (limit < start)) ||
+          ((delta < 0) && (limit > start))) {
+        size = 0;
+      } else if (std::is_integral<T>::value) {
+        // The following is copied from tensorflow::RangeOp::Compute().
+        size = Eigen::divup(Eigen::numext::abs(limit - start),
+                            Eigen::numext::abs(delta));
+      } else {
+        // The following is copied from tensorflow::RangeOp::Compute().
+        auto size_auto =
+            Eigen::numext::ceil(Eigen::numext::abs((limit - start) / delta));
+        OP_REQUIRES(
+            context, size_auto <= std::numeric_limits<int64_t>::max(),
+            errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
+                                    std::numeric_limits<int64_t>::max()));
+        size = static_cast<int64_t>(size_auto);
+      }
+      rt_nested_splits(row + 1) = rt_nested_splits(row) + size;
     }
     SPLITS_TYPE nvals = rt_nested_splits(nrows);
 
@@ -99,19 +117,6 @@ class RaggedRangeOp : public OpKernel {
       }
     }
   }
-
- private:
-  // Returns the number of elements in the specified range.
-  SPLITS_TYPE RangeSize(T start, T limit, T delta) {
-    if (((delta > 0) && (limit < start)) || ((delta < 0) && (limit > start))) {
-      return 0;
-    }
-    // The following is copied from tensorflow::RangeOp::Compute().
-    return (std::is_integral<T>::value
-                ? ((std::abs(limit - start) + std::abs(delta) - 1) /
-                   std::abs(delta))
-                : std::ceil(std::abs((limit - start) / delta)));
-  }
 };
 
 #define REGISTER_CPU_KERNEL(TYPE)                                  \
diff --git a/tensorflow/core/kernels/ragged_range_op_test.cc b/tensorflow/core/kernels/ragged_range_op_test.cc
index 94aaedde3420e0..fc3b302eeb7e30 100644
--- a/tensorflow/core/kernels/ragged_range_op_test.cc
+++ b/tensorflow/core/kernels/ragged_range_op_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <gtest/gtest.h>
 #include "tensorflow/core/framework/fake_input.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -77,6 +78,17 @@ TEST_F(RaggedRangeOpTest, FloatValues) {
       test::AsTensor<float>({0, 2, 4, 6, 5, 6, 5, 4, 3, 2}), 0.1);
 }
 
+TEST_F(RaggedRangeOpTest, RangeSizeOverflow) {
+  BuildRaggedRangeGraph<float>();
+  AddInputFromArray<float>(TensorShape({2}), {1.1, 0.1});    // starts
+  AddInputFromArray<float>(TensorShape({2}), {10.0, 1e10});  // limits
+  AddInputFromArray<float>(TensorShape({2}), {1, 1e-10});    // deltas
+
+  EXPECT_EQ(absl::StrCat("Requires ((limit - start) / delta) <= ",
+                         std::numeric_limits<int64_t>::max()),
+            RunOpKernel().error_message());
+}
+
 TEST_F(RaggedRangeOpTest, BroadcastDeltas) {
   BuildRaggedRangeGraph<int>();
   AddInputFromArray<int>(TensorShape({3}), {0, 5, 8});  // starts
diff --git a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
index bf64bd6ce2dfb6..5b1adef77dfae2 100644
--- a/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
+++ b/tensorflow/core/kernels/ragged_tensor_to_variant_op.cc
@@ -184,6 +184,10 @@ class RaggedTensorToVariantOp : public OpKernel {
     batched_ragged_input.mutable_nested_splits()->reserve(
         ragged_nested_splits_len);
     for (int i = 0; i < ragged_nested_splits_len; i++) {
+      OP_REQUIRES(context, ragged_nested_splits_in[i].dims() == 1,
+                  errors::InvalidArgument("Requires nested_row_splits[", i, "]",
+                                          " to be rank 1 but is rank ",
+                                          ragged_nested_splits_in[i].dims()));
       batched_ragged_input.append_splits(ragged_nested_splits_in[i]);
     }
 
diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc
index 8ec9ed7d24b081..b85c120eba9aa8 100644
--- a/tensorflow/core/kernels/random_op.cc
+++ b/tensorflow/core/kernels/random_op.cc
@@ -166,7 +166,7 @@ class RandomGammaOp : public OpKernel {
     }
     const int64_t samples_per_alpha = samples_shape.num_elements();
 
-    samples_shape.AppendShape(alpha_t.shape());
+    OP_REQUIRES_OK(ctx, samples_shape.AppendShapeWithStatus(alpha_t.shape()));
     // Allocate output samples.
     Tensor* samples_t = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, samples_shape, &samples_t));
diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc
index b4c4d5d95c1881..a14bee790753ca 100644
--- a/tensorflow/core/kernels/random_poisson_op.cc
+++ b/tensorflow/core/kernels/random_poisson_op.cc
@@ -296,8 +296,8 @@ class RandomPoissonOp : public OpKernel {
     TensorShape samples_shape;
     OP_REQUIRES_OK(ctx, tensor::MakeShape(shape_t, &samples_shape));
     const int64_t num_samples = samples_shape.num_elements();
+    OP_REQUIRES_OK(ctx, samples_shape.AppendShapeWithStatus(rate_t.shape()));
 
-    samples_shape.AppendShape(rate_t.shape());
     // Allocate output samples.
     Tensor* samples_t = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, samples_shape, &samples_t));
diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc
index 3259e5ddd096aa..bc5de171639267 100644
--- a/tensorflow/core/kernels/requantize.cc
+++ b/tensorflow/core/kernels/requantize.cc
@@ -18,9 +18,11 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include <math.h>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/meta_support.h"
@@ -38,10 +40,34 @@ class RequantizeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
-    const float input_min_float = ctx->input(1).flat<float>()(0);
-    const float input_max_float = ctx->input(2).flat<float>()(0);
-    const float requested_output_min_float = ctx->input(3).flat<float>()(0);
-    const float requested_output_max_float = ctx->input(4).flat<float>()(0);
+
+    const Tensor& input_min = ctx->input(1);
+    const Tensor& input_max = ctx->input(2);
+    const Tensor& requested_output_min = ctx->input(3);
+    const Tensor& requested_output_max = ctx->input(4);
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_min.shape()),
+        errors::InvalidArgument("`input_min` must be rank 0 but is rank ",
+                                input_min.dims()));
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsScalar(input_max.shape()),
+        errors::InvalidArgument("`input_max` must be rank 0 but is rank ",
+                                input_max.dims()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_min.shape()),
+                errors::InvalidArgument(
+                    "`requested_output_min` must be rank 0 but is rank ",
+                    requested_output_min.dims()));
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(requested_output_max.shape()),
+                errors::InvalidArgument(
+                    "`requested_output_max` must be rank 0 but is rank ",
+                    requested_output_max.dims()));
+
+    const float input_min_float = input_min.flat<float>()(0);
+    const float input_max_float = input_max.flat<float>()(0);
+    const float requested_output_min_float =
+        requested_output_min.flat<float>()(0);
+    const float requested_output_max_float =
+        requested_output_max.flat<float>()(0);
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
diff --git a/tensorflow/core/kernels/requantize_op_test.cc b/tensorflow/core/kernels/requantize_op_test.cc
index 133f92b029d8da..5663520fdbfda9 100644
--- a/tensorflow/core/kernels/requantize_op_test.cc
+++ b/tensorflow/core/kernels/requantize_op_test.cc
@@ -53,10 +53,10 @@ TEST_F(RequantizeTest, HandCraftedRequantize) {
   // Requantize to -1 to 1.
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-1.0f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-1.0f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
   TF_ASSERT_OK(RunOpKernel());
   Tensor expected(allocator(), DT_QUINT8, TensorShape({value_count}));
   test::FillValues<quint8>(&expected, {0, 128, 255});
@@ -71,10 +71,10 @@ TEST_F(RequantizeTest, InvalidOutputMin) {
 
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {0.01f});
-  AddInputFromArray<float>(TensorShape({1}), {1.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {0.01f});
+  AddInputFromArray<float>(TensorShape({}), {1.0f});
   EXPECT_EQ("requested_output_min must be <= 0, but got 0.01",
             RunOpKernel().error_message());
 }
@@ -85,10 +85,10 @@ TEST_F(RequantizeTest, InvalidOutputMax) {
 
   AddInputFromArray<qint32>(TensorShape({value_count}),
                             {-(1 << 23), 0, (1 << 23)});
-  AddInputFromArray<float>(TensorShape({1}), {-256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {256.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-10.0f});
-  AddInputFromArray<float>(TensorShape({1}), {-11.0f});
+  AddInputFromArray<float>(TensorShape({}), {-256.0f});
+  AddInputFromArray<float>(TensorShape({}), {256.0f});
+  AddInputFromArray<float>(TensorShape({}), {-10.0f});
+  AddInputFromArray<float>(TensorShape({}), {-11.0f});
   EXPECT_EQ(
       "requested_output_max must be >= requested_output_min, but got -11 and "
       "-10",
diff --git a/tensorflow/core/kernels/reshape_op.h b/tensorflow/core/kernels/reshape_op.h
index cd8ffefdff2274..9f6dd2c156cea2 100644
--- a/tensorflow/core/kernels/reshape_op.h
+++ b/tensorflow/core/kernels/reshape_op.h
@@ -45,6 +45,11 @@ class ReshapeOp : public OpKernel {
          TensorShapeUtils::IsScalar(sizes.shape())),
         errors::InvalidArgument("sizes input must be 1-D, not ",
                                 sizes.shape().DebugString()));
+    OP_REQUIRES(
+        context, sizes.NumElements() < TensorShape::MaxDimensions(),
+        errors::InvalidArgument("too many dimensions: must be < ",
+                                TensorShape::MaxDimensions(), ", but received ",
+                                sizes.NumElements()));
 
     // Compute the output shape.  Determine product of specified
     // dimensions, and find the index of the unspecified one.
diff --git a/tensorflow/core/kernels/rnn/lstm_ops.cc b/tensorflow/core/kernels/rnn/lstm_ops.cc
index ab4b9c695a5699..b0a27c1914af74 100644
--- a/tensorflow/core/kernels/rnn/lstm_ops.cc
+++ b/tensorflow/core/kernels/rnn/lstm_ops.cc
@@ -1138,19 +1138,30 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Tensor* x;
     OP_REQUIRES_OK(ctx, ctx->input("x", &x));
-    OP_REQUIRES(ctx, x->dims() == 3, errors::InvalidArgument("x must be 3D"));
+    OP_REQUIRES(
+        ctx, x->dims() == 3,
+        errors::InvalidArgument("x must be rank 3 but is rank ", x->dims()));
     const int64_t timelen = x->dim_size(0);
     const int64_t batch_size = x->dim_size(1);
     const int64_t input_size = x->dim_size(2);
 
     const Tensor* cs_prev_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("cs_prev", &cs_prev_tensor));
+    OP_REQUIRES(ctx, cs_prev_tensor->dims() == 2,
+                errors::InvalidArgument("cs_prev must be rank 2 but is rank ",
+                                        cs_prev_tensor->dims()));
 
     const Tensor* h_prev_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("h_prev", &h_prev_tensor));
+    OP_REQUIRES(ctx, h_prev_tensor->dims() == 2,
+                errors::InvalidArgument("h_prev must be rank 2 but is rank ",
+                                        h_prev_tensor->dims()));
 
     const Tensor* w_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("w", &w_tensor));
+    OP_REQUIRES(ctx, w_tensor->dims() == 2,
+                errors::InvalidArgument("w must be rank 2 but is rank ",
+                                        w_tensor->dims()));
     const int64_t cell_size = w_tensor->dim_size(1) / 4;
     OP_REQUIRES(ctx, input_size + cell_size == w_tensor->dim_size(0),
                 errors::InvalidArgument(
@@ -1159,15 +1170,27 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Tensor* wci_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wci", &wci_tensor));
+    OP_REQUIRES(ctx, wci_tensor->dims() == 1,
+                errors::InvalidArgument("wci must be rank 1 but is rank ",
+                                        wci_tensor->dims()));
 
     const Tensor* wcf_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wcf", &wcf_tensor));
+    OP_REQUIRES(ctx, wcf_tensor->dims() == 1,
+                errors::InvalidArgument("wcf must be rank 1 but is rank ",
+                                        wcf_tensor->dims()));
 
     const Tensor* wco_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("wco", &wco_tensor));
+    OP_REQUIRES(ctx, wco_tensor->dims() == 1,
+                errors::InvalidArgument("wco must be rank 1 but is rank ",
+                                        wco_tensor->dims()));
 
     const Tensor* b_tensor = nullptr;
     OP_REQUIRES_OK(ctx, ctx->input("b", &b_tensor));
+    OP_REQUIRES(ctx, b_tensor->dims() == 1,
+                errors::InvalidArgument("b must be rank 1 but is rank ",
+                                        b_tensor->dims()));
     OP_REQUIRES(
         ctx, cell_size == b_tensor->dim_size(0) / 4,
         errors::InvalidArgument("w and b cell_size don't match: ", cell_size,
diff --git a/tensorflow/core/kernels/sdca_internal.cc b/tensorflow/core/kernels/sdca_internal.cc
index 58d83f6936a8a0..b2a9bc630af6e1 100644
--- a/tensorflow/core/kernels/sdca_internal.cc
+++ b/tensorflow/core/kernels/sdca_internal.cc
@@ -389,6 +389,13 @@ Status Examples::Initialize(OpKernelContext* const context,
   OpInputList dense_features_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list("dense_features", &dense_features_inputs));
+  for (int i = 0; i < dense_features_inputs.size(); ++i) {
+    if (!TensorShapeUtils::IsMatrix(dense_features_inputs[i].shape())) {
+      return errors::InvalidArgument("Dense features at index ", i,
+                                     " must be rank 2 but is rank ",
+                                     dense_features_inputs[i].dims());
+    }
+  }
 
   examples_.clear();
   examples_.resize(num_examples);
diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc
index 98b4fd1c82b239..d279eda86e741c 100644
--- a/tensorflow/core/kernels/sdca_ops.cc
+++ b/tensorflow/core/kernels/sdca_ops.cc
@@ -49,6 +49,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -142,6 +143,10 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) {
   const Tensor* example_state_data_t;
   OP_REQUIRES_OK(context,
                  context->input("example_state_data", &example_state_data_t));
+  OP_REQUIRES(
+      context, TensorShapeUtils::IsMatrix(example_state_data_t->shape()),
+      errors::InvalidArgument("example_state_data must be rank 2 but is rank ",
+                              example_state_data_t->dims()));
   TensorShape expected_example_state_shape({examples.num_examples(), 4});
   OP_REQUIRES(context,
               example_state_data_t->shape() == expected_example_state_shape,
diff --git a/tensorflow/core/kernels/searchsorted_op.cc b/tensorflow/core/kernels/searchsorted_op.cc
index 9ab4bb93da72be..4a587001bb6c8b 100644
--- a/tensorflow/core/kernels/searchsorted_op.cc
+++ b/tensorflow/core/kernels/searchsorted_op.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/fill_functor.h"
 #include "tensorflow/core/lib/core/bits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/threadpool.h"
@@ -129,6 +130,14 @@ class UpperBoundOp : public OpKernel {
     auto output = output_t->template flat<OutType>();
     const auto sorted_inputs = sorted_inputs_t.template flat<T>();
     const auto values = values_t.template flat<T>();
+
+    // For empty inputs, all values will be placed at the zeroth position.
+    if (sorted_inputs.size() == 0) {
+      functor::SetZeroFunctor<Device, OutType> set_zero;
+      set_zero(ctx->eigen_device<Device>(), output);
+      return;
+    }
+
     OP_REQUIRES_OK(
         ctx, functor::UpperBoundFunctor<Device, T, OutType>::Compute(
                  ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
@@ -174,6 +183,14 @@ class LowerBoundOp : public OpKernel {
     auto output = output_t->template flat<OutType>();
     const auto sorted_inputs = sorted_inputs_t.template flat<T>();
     const auto values = values_t.template flat<T>();
+
+    // For empty inputs, all values will be placed at the zeroth position.
+    if (sorted_inputs.size() == 0) {
+      functor::SetZeroFunctor<Device, OutType> set_zero;
+      set_zero(ctx->eigen_device<Device>(), output);
+      return;
+    }
+
     OP_REQUIRES_OK(
         ctx, functor::LowerBoundFunctor<Device, T, OutType>::Compute(
                  ctx, sorted_inputs, values, sorted_inputs_t.dim_size(0),
diff --git a/tensorflow/core/kernels/set_kernels.cc b/tensorflow/core/kernels/set_kernels.cc
index 74b5b543b7c7e3..0e610ac4136a56 100644
--- a/tensorflow/core/kernels/set_kernels.cc
+++ b/tensorflow/core/kernels/set_kernels.cc
@@ -70,8 +70,12 @@ Status SparseTensorFromContext(OpKernelContext* ctx, const int32_t base_index,
                                sparse::SparseTensor* tensor) {
   // Assume row-major order.
   TensorShape shape;
-  TF_RETURN_IF_ERROR(TensorShape::BuildTensorShape(
-      ctx->input(base_index + 2).vec<int64_t>(), &shape));
+  const Tensor& shape_tensor = ctx->input(base_index + 2);
+  if (shape_tensor.dims() != 1) {
+    return errors::InvalidArgument("Shape must be a 1D tensor.");
+  }
+  TF_RETURN_IF_ERROR(
+      TensorShape::BuildTensorShape(shape_tensor.vec<int64_t>(), &shape));
   CheckRankAtLeast2(ctx, shape);
   std::vector<int64_t> order(shape.dims());
   std::iota(order.begin(), order.end(), 0);
diff --git a/tensorflow/core/kernels/sobol_op.cc b/tensorflow/core/kernels/sobol_op.cc
index 94fff6baea2688..484bac99463300 100644
--- a/tensorflow/core/kernels/sobol_op.cc
+++ b/tensorflow/core/kernels/sobol_op.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "sobol_data.h"  // from @sobol_data
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/core/threadpool.h"
 #include "tensorflow/core/platform/platform_strings.h"
 
@@ -134,8 +135,14 @@ class SobolSampleOp : public OpKernel {
       : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(0).shape()),
+                errors::InvalidArgument("dim must be a scalar"));
     int32_t dim = context->input(0).scalar<int32_t>()();
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(1).shape()),
+                errors::InvalidArgument("num_results must be a scalar"));
     int32_t num_results = context->input(1).scalar<int32_t>()();
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(context->input(2).shape()),
+                errors::InvalidArgument("skip must be a scalar"));
     int32_t skip = context->input(2).scalar<int32_t>()();
 
     OP_REQUIRES(context, dim >= 1,
diff --git a/tensorflow/core/kernels/sparse/sparse_matrix.h b/tensorflow/core/kernels/sparse/sparse_matrix.h
index 3476aa48f5d052..5e70b07ff8a7ed 100644
--- a/tensorflow/core/kernels/sparse/sparse_matrix.h
+++ b/tensorflow/core/kernels/sparse/sparse_matrix.h
@@ -25,10 +25,12 @@ limitations under the License.
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/framework/variant_encode_decode.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
+#include "tensorflow/core/platform/errors.h"
 
 namespace tensorflow {
 
@@ -633,6 +635,11 @@ template <typename T>
 Status ExtractVariantFromInput(OpKernelContext* ctx, int index,
                                const T** value) {
   const Tensor& input_t = ctx->input(index);
+  if (!TensorShapeUtils::IsScalar(input_t.shape())) {
+    return errors::InvalidArgument(
+        "Invalid input matrix: Shape must be rank 0 but is rank ",
+        input_t.dims());
+  }
   const Variant& input_variant = input_t.scalar<Variant>()();
   *value = input_variant.get<T>();
   if (*value == nullptr) {
diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc
index 09c503e2ec216b..f4fa54a670e22c 100644
--- a/tensorflow/core/kernels/sparse_cross_op.cc
+++ b/tensorflow/core/kernels/sparse_cross_op.cc
@@ -24,12 +24,14 @@ limitations under the License.
 #include "tensorflow/core/framework/kernel_def_builder.h"
 #include "tensorflow/core/framework/op_def_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/op_requires.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/fingerprint.h"
 #include "tensorflow/core/platform/strong_hash.h"
 #include "tensorflow/core/util/work_sharder.h"
@@ -832,6 +834,10 @@ class SparseCrossV2Op : public OpKernel {
 
     const Tensor* sep_t;
     OP_REQUIRES_OK(context, context->input("sep", &sep_t));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(sep_t->shape()),
+                errors::InvalidArgument("Input separator should be a scalar. "
+                                        "Received: ",
+                                        sep_t->DebugString()));
     const tstring separator = sep_t->scalar<tstring>()();
 
     std::vector<std::unique_ptr<ColumnInterface<tstring>>> columns =
diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
index 8ef4ce6172f367..2efa88106ab523 100644
--- a/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op_gpu.cu.cc
@@ -297,9 +297,12 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
       empty_row_indicator = empty_row_indicator_t.vec<bool>().data();
     }
 
-    TF_RETURN_IF_ERROR(wrap_kernel_call(ComputeEmptyRowIndicatorKernel<Tindex>,
-                                        /*device=*/device, /*size=*/dense_rows,
-                                        elements_per_row, empty_row_indicator));
+    if (dense_rows > 0) {
+      TF_RETURN_IF_ERROR(
+          wrap_kernel_call(ComputeEmptyRowIndicatorKernel<Tindex>,
+                           /*device=*/device, /*size=*/dense_rows,
+                           elements_per_row, empty_row_indicator));
+    }
 
     // For each row, the number of empty rows up to and including that row.
     Tensor num_empty_rows_through_t;
@@ -405,14 +408,16 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
             done);
       }
 
-      OP_REQUIRES_OK_ASYNC(
-          context,
-          wrap_kernel_call(ScatterNewElementsKernel<T, Tindex>,
-                           /*device=*/device, /*size=*/dense_rows, rank,
-                           default_value, num_empty_rows_through,
-                           input_row_ends, empty_row_indicator, output_indices,
-                           output_values),
-          done);
+      if (dense_rows > 0) {
+        OP_REQUIRES_OK_ASYNC(
+            context,
+            wrap_kernel_call(ScatterNewElementsKernel<T, Tindex>,
+                             /*device=*/device, /*size=*/dense_rows, rank,
+                             default_value, num_empty_rows_through,
+                             input_row_ends, empty_row_indicator,
+                             output_indices, output_values),
+            done);
+      }
 
       done();
     };
@@ -461,9 +466,11 @@ struct SparseFillEmptyRows<GPUDevice, T, Tindex> {
     TF_RETURN_IF_ERROR(
         context->allocate_temp(index_type, TensorShape({N}), &row_indices_t));
     auto row_indices = row_indices_t.flat<Tindex>();
-    TF_RETURN_IF_ERROR(wrap_kernel_call(CopyRowIndicesKernel<Tindex>,
-                                        /*device=*/device, /*size=*/N, rank,
-                                        indices, row_indices));
+    if (N > 0) {
+      TF_RETURN_IF_ERROR(wrap_kernel_call(CopyRowIndicesKernel<Tindex>,
+                                          /*device=*/device, /*size=*/N, rank,
+                                          indices, row_indices));
+    }
     // Allocate input_index_map.
     TF_RETURN_IF_ERROR(context->allocate_temp(index_type, TensorShape({N}),
                                               input_index_map_t));
@@ -528,9 +535,11 @@ struct SparseFillEmptyRowsGrad<GPUDevice, T, Tindex> {
     auto visited = visited_t.vec<bool>();
     visited.device(device) = visited.constant(false);
 
-    TF_RETURN_IF_ERROR(wrap_kernel_call(
-        GatherOriginalGradValuesKernel<T, Tindex>, /*device=*/device,
-        /*size=*/N, reverse_index_map, grad_values, d_values, visited));
+    if (N > 0) {
+      TF_RETURN_IF_ERROR(wrap_kernel_call(
+          GatherOriginalGradValuesKernel<T, Tindex>, /*device=*/device,
+          /*size=*/N, reverse_index_map, grad_values, d_values, visited));
+    }
 
     // Now we mask out the visited values and sum the remaining ones (which
     // correspond to the empty rows in the forward input) to compute
diff --git a/tensorflow/core/kernels/sparse_utils.cc b/tensorflow/core/kernels/sparse_utils.cc
index 75e42eebe25bef..ff340f81eb44e8 100644
--- a/tensorflow/core/kernels/sparse_utils.cc
+++ b/tensorflow/core/kernels/sparse_utils.cc
@@ -16,8 +16,12 @@ limitations under the License.
 #include "tensorflow/core/kernels/sparse_utils.h"
 
 #include <cstddef>
+#include <cstdint>
 
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/status.h"
 
 namespace tensorflow {
 namespace sparse_utils {
@@ -140,6 +144,165 @@ bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices) {
   return false;
 }
 
+namespace {
+
+// Ensures indices, values, shape are all of the proper ranks and are
+// compatible.
+Status ValidateSparseTensorShape(const Tensor& indices, const Tensor& values,
+                                 const Tensor& shape) {
+  // Indices must be a matrix, and values/shape must be a vector.
+  if (!TensorShapeUtils::IsMatrix(indices.shape())) {
+    return errors::InvalidArgument("Sparse indices must be rank 2 but is rank ",
+                                   indices.shape().dim_sizes().size());
+  }
+  if (!TensorShapeUtils::IsVector(values.shape())) {
+    return errors::InvalidArgument("Sparse values must be rank 1 but is rank ",
+                                   values.shape().dims());
+  }
+  if (!TensorShapeUtils::IsVector(shape.shape())) {
+    return errors::InvalidArgument("Sparse shape must be rank 1 but is rank ",
+                                   shape.shape().dims());
+  }
+  // Indices shape must be compatible with the values vector and dense shape.
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+  if (values.dim_size(0) != nnz) {
+    return errors::InvalidArgument("Number of elements in indices (", nnz,
+                                   ") and values (", values.dim_size(0),
+                                   ") do not match");
+  }
+  if (shape.NumElements() != ndims) {
+    return errors::InvalidArgument("Index rank (", ndims, ") and shape rank (",
+                                   shape.NumElements(), ") do not match");
+  }
+
+  return Status::OK();
+}
+
+// Creates a debug string for the index tuple in indices(row, :).
+template <typename IndexTensor>
+string CreateIndexString(const IndexTensor& indices, int64_t row) {
+  const int64_t ndims = indices.dimension(1);
+  string index_str = strings::StrCat("indices[", row, ", :] = [");
+  for (int64_t dim = 0; dim < ndims; ++dim) {
+    strings::StrAppend(&index_str, indices(row, dim),
+                       dim < ndims - 1 ? ", " : "]");
+  }
+  if (ndims == 0) {
+    strings::StrAppend(&index_str, "]");
+  }
+  return index_str;
+}
+
+// Ensures all sparse indices are within correct bounds.
+template <typename Tindices>
+Status ValidateSparseTensorIndicesUnordered(const Tensor& indices,
+                                            const Tensor& shape) {
+  // Ensure no index is out-of-bounds.
+  const auto indices_mat = indices.flat_inner_dims<Tindices>();
+  const auto shape_vec = shape.flat<Tindices>();
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+
+  for (int64_t i = 0; i < nnz; ++i) {
+    for (int64_t dim = 0; dim < ndims; ++dim) {
+      const Tindices idx = indices_mat(i, dim);
+      if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+        string index_str = CreateIndexString(indices_mat, i);
+        return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                       " is out of bounds");
+      }
+    }
+  }
+
+  return Status::OK();
+}
+
+// Ensures all sparse indices are within correct bounds and are
+// lexicographically ordered.
+template <typename Tindices>
+Status ValidateSparseTensorIndicesOrdered(const Tensor& indices,
+                                          const Tensor& shape) {
+  const auto indices_mat = indices.flat_inner_dims<Tindices>();
+  const auto shape_vec = shape.flat<Tindices>();
+  int64_t nnz = indices.dim_size(0);
+  int64_t ndims = indices.dim_size(1);
+
+  if (nnz == 0) {
+    return Status::OK();
+  }
+
+  // First set of indices must be within range.
+  for (int64_t dim = 0; dim < ndims; ++dim) {
+    const Tindices idx = indices_mat(0, dim);
+    if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+      string index_str = CreateIndexString(indices_mat, 0);
+      return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                     " is out of bounds");
+    }
+  }
+
+  // Remaining set of indices must be within range and lexicographically
+  // larger than the previous.
+  for (int64_t i = 1; i < nnz; ++i) {
+    bool different = false;
+    for (int64_t dim = 0; dim < ndims; ++dim) {
+      const Tindices idx = indices_mat(i, dim);
+      const Tindices prev_idx = indices_mat(i - 1, dim);
+      // If indices are already different from previous i, the new index can
+      // be anything within the valid range.
+      if (TF_PREDICT_TRUE(different)) {
+        if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+          string index_str = CreateIndexString(indices_mat, i);
+          return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                         " is out of bounds");
+        }
+      } else {
+        // Otherwise, the new index must be >= previous and <= shape(dim).
+        if (TF_PREDICT_FALSE(idx < prev_idx || idx >= shape_vec(dim))) {
+          string index_str = CreateIndexString(indices_mat, i);
+          // Check if index is actually out of bounds.
+          if (TF_PREDICT_FALSE(idx < 0 || idx >= shape_vec(dim))) {
+            return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                           " is out of bounds");
+          } else {
+            return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                           " is out of order");
+          }
+        } else if (TF_PREDICT_TRUE(idx > prev_idx)) {
+          different = true;
+        }
+      }  // if (different)
+    }    // for dim in [0, ndims)
+
+    if (TF_PREDICT_FALSE(!different)) {
+      string index_str = CreateIndexString(indices_mat, i);
+      return errors::InvalidArgument("Sparse index tuple ", index_str,
+                                     " is repeated");
+    }
+  }  // for i in [1, nnz)
+
+  return Status::OK();
+}
+
+}  // namespace
+
+template <typename Tindices>
+Status ValidateSparseTensor(const Tensor& indices, const Tensor& values,
+                            const Tensor& shape,
+                            IndexValidation index_validation) {
+  TF_RETURN_IF_ERROR(ValidateSparseTensorShape(indices, values, shape));
+  switch (index_validation) {
+    case IndexValidation::kOrdered:
+      return ValidateSparseTensorIndicesOrdered<Tindices>(indices, shape);
+    case IndexValidation::kUnordered:
+      return ValidateSparseTensorIndicesUnordered<Tindices>(indices, shape);
+    case IndexValidation::kNone: {
+    }
+  }
+  return Status::OK();
+}
+
 #define REGISTER_SPARSE_UTIL_FUNCTIONS(TypeIndex)                           \
   template TypeIndex FindNextDenseRowStartIndex<TypeIndex>(                 \
       const TypeIndex sparse_index_begin,                                   \
@@ -151,7 +314,10 @@ bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices) {
       const std::vector<TypeIndex>& row_start_indices);                     \
   template std::vector<TypeIndex> ParseRowStartIndices<TypeIndex>(          \
       const tensorflow::Tensor& tensor,                                     \
-      const TypeIndex num_nonzero_entries_in_sparse_mat);
+      const TypeIndex num_nonzero_entries_in_sparse_mat);                   \
+  template Status ValidateSparseTensor<TypeIndex>(                          \
+      const Tensor& indices, const Tensor& values, const Tensor& shape,     \
+      IndexValidation index_validation)
 
 REGISTER_SPARSE_UTIL_FUNCTIONS(int32);
 REGISTER_SPARSE_UTIL_FUNCTIONS(int64);
diff --git a/tensorflow/core/kernels/sparse_utils.h b/tensorflow/core/kernels/sparse_utils.h
index d43b2e34470a5e..4e6ab744691c28 100644
--- a/tensorflow/core/kernels/sparse_utils.h
+++ b/tensorflow/core/kernels/sparse_utils.h
@@ -65,6 +65,23 @@ std::vector<Tindices> ParseRowStartIndices(
 template <typename Tindices>
 bool ContainsEmptyRows(const std::vector<Tindices>& row_start_indices);
 
+// Methods for validating sparse indices.
+enum class IndexValidation {
+  kNone,      // Indices are not used by the op, or are not directly accessible
+              // (e.g. on GPU).
+  kOrdered,   // Indices must be unique, in lexicographical order, and within
+              // safe bounds.
+  kUnordered  // Indices must be within safe bounds, but may repeat or appear
+              // out-of-order.
+};
+
+// Validates the three component tensors of a sparse tensor have the proper
+// shapes.  Also validates index values according to the method supplied.
+template <typename Tindices>
+Status ValidateSparseTensor(const Tensor& indices, const Tensor& values,
+                            const Tensor& shape,
+                            IndexValidation index_validation);
+
 }  // namespace sparse_utils
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/kernels/sparse_utils_test.cc b/tensorflow/core/kernels/sparse_utils_test.cc
index c480829237119c..1bdf79ea956571 100644
--- a/tensorflow/core/kernels/sparse_utils_test.cc
+++ b/tensorflow/core/kernels/sparse_utils_test.cc
@@ -15,27 +15,29 @@ limitations under the License.
 
 #include "tensorflow/core/kernels/sparse_utils.h"
 
+#include <algorithm>
+#include <cstdint>
+#include <set>
+#include <utility>
 #include <vector>
 
+#include "absl/container/flat_hash_set.h"
 #include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/random/philox_random.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/platform/status_matchers.h"
 #include "tensorflow/core/platform/test.h"
 
+namespace tensorflow {
+namespace sparse_utils {
 namespace {
 
-using ::int64_t;
-using tensorflow::DataType;
-using tensorflow::int32;
-using tensorflow::Tensor;
-using tensorflow::TTypes;
-using tensorflow::uint16;
-using tensorflow::uint32;
-using tensorflow::uint64;
-using tensorflow::sparse_utils::ContainsEmptyRows;
-using tensorflow::sparse_utils::FindNextDenseRowStartIndex;
-using tensorflow::sparse_utils::GetStartIndicesOfEachDenseRow;
-using tensorflow::sparse_utils::ParseRowStartIndices;
+using ::tensorflow::testing::StatusIs;
+using ::testing::MatchesRegex;
 
 TEST(SparseUtilsTest, GetStartIndicesOfEachDenseRow) {
   {
@@ -260,4 +262,289 @@ TEST(SparseUtilsTest, FindNextDenseRowStartIndex) {
   }
 }
 
+// Returns a shared random number generator.
+::tensorflow::random::SimplePhilox& RandomPhilox() {
+  // Safe initialization of static random generator.
+  static auto* philox =
+      new ::tensorflow::random::PhiloxRandom(tensorflow::testing::RandomSeed());
+  static auto* rnd = new ::tensorflow::random::SimplePhilox(philox);
+  return *rnd;
+}
+
+// Fills a tensor of indices with a unique set of random index tuples.
+// The `SetType` must be a std::set-like type (e.g. flat_hash_set, btree_set)
+// that is used to ensure uniqueness and governs the final index tuple order.
+// For example, use a hash set for unordered indices, and sorted set for
+// lexicographically ordered indices. The `shape` is used to ensure proper index
+// bounds.
+template <typename SetType>
+void FillIndicesWithRandomTuples(const TensorShape& shape, Tensor& indices) {
+  const int64_t nnz = indices.dim_size(0);
+  const int64_t ndims = indices.dim_size(1);
+
+  SetType indices_set;
+  int64_t count = 0;
+  // Generate nnz unique random tuples.
+  while (count < nnz) {
+    std::vector<int64_t> candidate(ndims);
+    for (int64_t d = 0; d < ndims; ++d) {
+      candidate[d] = RandomPhilox().Uniform64(shape.dim_size(d));
+    }
+    auto it = indices_set.insert(std::move(candidate));
+    if (it.second) {
+      ++count;
+    }
+  }
+
+  // Copy index tuples from set into index tensor.
+  auto indices_mat = indices.matrix<int64_t>();
+  int64_t row = 0;
+  for (const std::vector<int64_t>& idxs : indices_set) {
+    for (int64_t col = 0; col < ndims; ++col) {
+      indices_mat(row, col) = idxs[col];
+    }
+    ++row;
+  }
+}
+
+// Populates components of a sparse random tensor with provided number of
+// non-zeros `max_nnz` and tensor shape `shape`.  If `ordered`, output indices
+// are ordered lexicographically.
+void GenerateRandomSparseTensor(int64_t max_nnz, const TensorShape& shape,
+                                bool ordered, Tensor& output_indices,
+                                Tensor& output_values, Tensor& output_shape) {
+  const int64_t ndims = shape.dims();
+  // We cannot generate more elements than the total in the tensor, so
+  // potentially reduce nnz.
+  const int64_t nnz = std::min(shape.num_elements(), max_nnz);
+  output_indices = Tensor(DT_INT64, TensorShape({nnz, ndims}));
+  output_values = Tensor(DT_FLOAT, TensorShape({nnz}));
+  output_shape = Tensor(DT_INT64, TensorShape({ndims}));
+
+  // Generate random unique sparse indices.
+  if (ordered) {
+    // NOTE: absl::btree_set does not seem to be available in TF OSS.
+    FillIndicesWithRandomTuples<std::set<std::vector<int64_t>>>(shape,
+                                                                output_indices);
+  } else {
+    FillIndicesWithRandomTuples<absl::flat_hash_set<std::vector<int64_t>>>(
+        shape, output_indices);
+  }
+
+  auto values_vec = output_values.vec<float>();
+  values_vec.setRandom();
+
+  auto shape_vec = output_shape.vec<int64_t>();
+  for (int i = 0; i < shape.dims(); ++i) {
+    shape_vec(i) = shape.dim_size(i);
+  }
+}
+
+using ValidateSparseTensorTest = ::testing::TestWithParam<IndexValidation>;
+
+TEST_P(ValidateSparseTensorTest, ValidSparseTensorPasses) {
+  constexpr int kNumNonZeros = 1000;
+  const TensorShape kTensorShapes[] = {
+      {}, {3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    TF_EXPECT_OK((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                                index_validation)));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidIndicesRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  // Indices tensor must be rank 2, so try rank 0, 1, 3.
+  const TensorShape kInvalidIndicesShapes[] = {
+      {}, {kNumNonZeros}, {kNumNonZeros, kNumDims, 4}};
+  const IndexValidation index_validation = GetParam();
+  for (const TensorShape& invalid_shape : kInvalidIndicesShapes) {
+    const Tensor indices = Tensor(DT_INT64, invalid_shape);
+    const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+    const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse indices must be rank 2 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidValuesRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  // Values tensor must be rank 1, so try rank 0, 2.
+  const TensorShape kInvalidValuesShapes[] = {{}, {kNumNonZeros, 2}};
+  const IndexValidation index_validation = GetParam();
+  for (const TensorShape& invalid_shape : kInvalidValuesShapes) {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims}));
+    const Tensor values = Tensor(DT_FLOAT, invalid_shape);
+    const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse values must be rank 1 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, InvalidShapeRankFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  const IndexValidation index_validation = GetParam();
+  // Shape tensor must be rank 1, so try rank 0, 2.
+  const TensorShape kInvalidShapeShapes[] = {{}, {kNumDims, 2}};
+  for (const TensorShape& invalid_shape : kInvalidShapeShapes) {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims}));
+    const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+    const Tensor shape = Tensor(DT_INT64, invalid_shape);
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Sparse shape must be rank 1 .*")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IncompatibleShapesFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumDims = 3;
+  const IndexValidation index_validation = GetParam();
+
+  const Tensor values = Tensor(DT_FLOAT, TensorShape({kNumNonZeros}));
+  const Tensor shape = Tensor(DT_INT64, TensorShape({kNumDims}));
+
+  // Indices and values must have the same size in dimension 0 (nnz).
+  {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros + 1, kNumDims}));
+    EXPECT_THAT((ValidateSparseTensor<int64_t>(indices, values, shape,
+                                               index_validation)),
+                StatusIs(error::INVALID_ARGUMENT,
+                         MatchesRegex("Number of elements in indices .* and "
+                                      "values .* do not match")));
+  }
+
+  // Each index tuple must have the same size in dimension 1 as the dense
+  // tensor shape (ndims).
+  {
+    const Tensor indices =
+        Tensor(DT_INT64, TensorShape({kNumNonZeros, kNumDims + 1}));
+    EXPECT_THAT(
+        (ValidateSparseTensor<int64_t>(indices, values, shape,
+                                       index_validation)),
+        StatusIs(error::INVALID_ARGUMENT,
+                 MatchesRegex("Index rank .* and shape rank .* do not match")));
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IndexOutOfBoundsFails) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumTests = 100;
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+
+  const TensorShape kTensorShapes[] = {{3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    // Access tensor values.
+    auto indices_mat = indices.matrix<int64_t>();
+    for (int test = 0; test < kNumTests; ++test) {
+      // Pick a random entry and dimension, and make the index out of bounds.
+      int64_t row = RandomPhilox().Uniform64(indices.dim_size(0));
+      int64_t dim = RandomPhilox().Uniform64(indices.dim_size(1));
+      int64_t old_val = indices_mat(row, dim);
+
+      for (int64_t val : {static_cast<int64_t>(-1), test_shape.dim_size(dim)}) {
+        indices_mat(row, dim) = val;
+        Status indices_valid = ValidateSparseTensor<int64_t>(
+            indices, values, shape, index_validation);
+        if (index_validation == IndexValidation::kNone) {
+          TF_EXPECT_OK(indices_valid);
+        } else {
+          EXPECT_THAT(
+              indices_valid,
+              StatusIs(error::INVALID_ARGUMENT,
+                       MatchesRegex("Sparse index tuple .* is out of bounds")))
+              << indices_mat;
+        }
+      }
+
+      // Restore index for next test.
+      indices_mat(row, dim) = old_val;
+    }
+  }
+}
+
+TEST_P(ValidateSparseTensorTest, IndexOutOfOrderFailsForOrderedValidation) {
+  constexpr int kNumNonZeros = 1000;
+  constexpr int kNumTests = 100;
+  const TensorShape kTensorShapes[] = {{3}, {4, 5}, {6, 7, 8}, {9, 10, 11, 12}};
+  const IndexValidation index_validation = GetParam();
+  const bool ordered = (index_validation == IndexValidation::kOrdered);
+
+  for (const TensorShape& test_shape : kTensorShapes) {
+    Tensor indices, values, shape;
+    GenerateRandomSparseTensor(kNumNonZeros, test_shape, ordered, indices,
+                               values, shape);
+    // Access tensor values.
+    auto indices_mat = indices.matrix<int64_t>();
+    const int64_t nnz = indices.dim_size(0);
+    const int64_t ndims = indices.dim_size(1);
+    for (int test = 0; test < kNumTests; ++test) {
+      // Pick two random index entries to swap.
+      int64_t row1 = RandomPhilox().Uniform64(nnz);
+      int64_t row2;
+      do {
+        row2 = RandomPhilox().Uniform64(nnz);
+      } while (row1 == row2);
+      for (int dim = 0; dim < ndims; ++dim) {
+        std::swap(indices_mat(row1, dim), indices_mat(row2, dim));
+      }
+
+      Status indices_valid = ValidateSparseTensor<int64_t>(
+          indices, values, shape, index_validation);
+      if (ordered) {
+        EXPECT_THAT(
+            indices_valid,
+            StatusIs(error::INVALID_ARGUMENT,
+                     MatchesRegex("Sparse index tuple .* is out of order")));
+      } else {
+        TF_EXPECT_OK(indices_valid);
+      }
+
+      // Restore index for next test.
+      for (int dim = 0; dim < ndims; ++dim) {
+        std::swap(indices_mat(row1, dim), indices_mat(row2, dim));
+      }
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    ValidateSparseTensorTestSuite, ValidateSparseTensorTest,
+    ::testing::Values(IndexValidation::kNone, IndexValidation::kOrdered,
+                      IndexValidation::kUnordered),
+    [](const ::testing::TestParamInfo<ValidateSparseTensorTest::ParamType>&
+           info) {
+      switch (info.param) {
+        case IndexValidation::kNone:
+          return "None";
+        case IndexValidation::kUnordered:
+          return "Unordered";
+        case IndexValidation::kOrdered:
+          return "Ordered";
+      }
+    });
+
 }  // namespace
+}  // namespace sparse_utils
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/summary_audio_op.cc b/tensorflow/core/kernels/summary_audio_op.cc
index 09aab681f560aa..6015fe2ab5ebe4 100644
--- a/tensorflow/core/kernels/summary_audio_op.cc
+++ b/tensorflow/core/kernels/summary_audio_op.cc
@@ -49,6 +49,11 @@ class SummaryAudioOp : public OpKernel {
     float sample_rate = sample_rate_attr_;
     if (!has_sample_rate_attr_) {
       const Tensor& sample_rate_tensor = c->input(2);
+      OP_REQUIRES(c,
+                  sample_rate_tensor.IsAligned() &&
+                      sample_rate_tensor.NumElements() == 1,
+                  errors::InvalidArgument(
+                      "sample_rate must be rank-0 or contain a single value"));
       sample_rate = sample_rate_tensor.scalar<float>()();
     }
     OP_REQUIRES(c, sample_rate > 0.0f,
diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc
index 63b63b9473e403..f9b0ec070488ff 100644
--- a/tensorflow/core/ops/array_ops.cc
+++ b/tensorflow/core/ops/array_ops.cc
@@ -2879,6 +2879,10 @@ REGISTER_OP("QuantizeAndDequantizeV2")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2914,6 +2918,10 @@ REGISTER_OP("QuantizeAndDequantizeV4")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2945,6 +2953,10 @@ REGISTER_OP("QuantizeAndDequantizeV4Grad")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
@@ -2981,6 +2993,10 @@ REGISTER_OP("QuantizeAndDequantizeV3")
                                        axis);
       } else if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          return errors::InvalidArgument(
+              "Axis cannot be >= kint32max value, got ", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc
index 3f94da01b73cdb..090bf10a2a5dd9 100644
--- a/tensorflow/core/ops/math_ops.cc
+++ b/tensorflow/core/ops/math_ops.cc
@@ -1483,17 +1483,18 @@ Status RangeSize(const Tensor* start_t, const Tensor* limit_t,
     return errors::InvalidArgument("Requires delta != 0");
   }
 
-  auto size = (std::is_integral<T>::value
-                   ? ((Eigen::numext::abs(limit - start) +
-                       Eigen::numext::abs(delta) - T(1)) /
-                      Eigen::numext::abs(delta))
-                   : (Eigen::numext::ceil(
-                         Eigen::numext::abs((limit - start) / delta))));
-
-  // Undefined behaviour if size will not fit into int64_t
-  if (size > std::numeric_limits<int64_t>::max()) {
-    return errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
-                                   std::numeric_limits<int64_t>::max());
+  int64_t size;
+  if (std::is_integral<T>::value) {
+    size = Eigen::divup(static_cast<int64_t>(Eigen::numext::abs(limit - start)),
+                        static_cast<int64_t>(Eigen::numext::abs(delta)));
+  } else {
+    auto size_auto =
+        Eigen::numext::ceil(Eigen::numext::abs((limit - start) / delta));
+    if (size_auto > std::numeric_limits<int64_t>::max()) {
+      return errors::InvalidArgument("Requires ((limit - start) / delta) <= ",
+                                     std::numeric_limits<int64_t>::max());
+    }
+    size = static_cast<int64_t>(size_auto);
   }
 
   c->set_output(0, c->Vector(static_cast<int64_t>(size)));
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index e117fc0f41e96a..9171f946783b8a 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -60,6 +60,13 @@ Status FractionalPoolShapeFn(InferenceContext* c) {
     }
   }
 
+  for (std::size_t i = 0; i < pooling_ratio.size(); ++i) {
+    if (pooling_ratio[i] < 1) {
+      return errors::InvalidArgument(
+          "pooling_ratio cannot be smaller than 1, got: ", pooling_ratio[i]);
+    }
+  }
+
   c->set_output(0, c->MakeShape(output_dims));
   c->set_output(1, c->Vector(output_dims[1]));
   c->set_output(2, c->Vector(output_dims[2]));
@@ -574,7 +581,7 @@ REGISTER_OP("FusedResizeAndPadConv2D")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
-      return CommonFusedConvCalculations(c, true /* has_resize */);
+      return CommonFusedConvCalculations(c, /*has_resize=*/true);
     });
 
 REGISTER_OP("FusedPadConv2D")
@@ -587,7 +594,7 @@ REGISTER_OP("FusedPadConv2D")
     .Attr("strides: list(int)")
     .Attr(GetPaddingAttrString())
     .SetShapeFn([](InferenceContext* c) {
-      return CommonFusedConvCalculations(c, false /* has_resize */);
+      return CommonFusedConvCalculations(c, /*has_resize=*/false);
     });
 
 // --------------------------------------------------------------------------
diff --git a/tensorflow/core/ops/nn_ops_test.cc b/tensorflow/core/ops/nn_ops_test.cc
index 469a9015a17c98..41940da69ef4c7 100644
--- a/tensorflow/core/ops/nn_ops_test.cc
+++ b/tensorflow/core/ops/nn_ops_test.cc
@@ -523,7 +523,8 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
                        .Finalize(&op.node_def));
     };
 
-    set_op(std::vector<float>{2.0f, 1, 1 / 1.5f, 1 / 2.0f});
+    // pooling_ratio must >= 1.0
+    set_op(std::vector<float>{2.0f, 1, 1.5f, 4.0f});
 
     // Rank check.
     INFER_ERROR("must be rank 4", op, "[?,?,?]");
@@ -532,11 +533,11 @@ TEST(NNOpsTest, FractionalPool_ShapeFn) {
     INFER_OK(op, "?", "[?,?,?,?];[?];[?]");
     INFER_OK(op, "[?,?,?,?]", "[?,?,?,?];[?];[?]");
 
-    INFER_OK(op, "[10,20,30,40]", "[5,20,45,80];[20];[45]");
-    INFER_OK(op, "[?,20,30,40]", "[?,20,45,80];[20];[45]");
-    INFER_OK(op, "[10,?,30,40]", "[5,?,45,80];[?];[45]");
-    INFER_OK(op, "[10,20,?,40]", "[5,20,?,80];[20];[?]");
-    INFER_OK(op, "[10,20,30,?]", "[5,20,45,?];[20];[45]");
+    INFER_OK(op, "[10,20,30,40]", "[5,20,20,10];[20];[20]");
+    INFER_OK(op, "[?,20,30,40]", "[?,20,20,10];[20];[20]");
+    INFER_OK(op, "[10,?,30,40]", "[5,?,20,10];[?];[20]");
+    INFER_OK(op, "[10,20,?,40]", "[5,20,?,10];[20];[?]");
+    INFER_OK(op, "[10,20,30,?]", "[5,20,20,?];[20];[20]");
 
     // Wrong number of values for pooling_ratio.
     set_op(std::vector<float>{.5, 1.0, 1.5});
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index d72122ab47c600..2bba386af45a20 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -128,6 +128,16 @@ constexpr char kThrottleBucket[] = "GCS_THROTTLE_BUCKET_SIZE";
 constexpr char kTokensPerRequest[] = "GCS_TOKENS_PER_REQUEST";
 // The environment variable to configure the initial tokens (format: <int64_t>)
 constexpr char kInitialTokens[] = "GCS_INITIAL_TOKENS";
+// The environment variable for GCS retry init_delay_time_us (format: <int64_t>)
+constexpr char kRetryConfigInitialDelayTimeUs[] =
+    "GCS_RETRY_CONFIG_INIT_DELAY_TIME_US";
+// The environment variable for GCS retry config max_delay_time_us (format:
+// <int64_t>)
+constexpr char kRetryConfigMaxDelayTimeUs[] =
+    "GCS_RETRY_CONFIG_MAX_DELAY_TIME_US";
+// The environment variable that controls the number of retries in GCS
+// exponential retries (format: <int32_t>)
+constexpr char kRetryConfigMaxRetries[] = "GCS_RETRY_CONFIG_MAX_RETRIES";
 
 // The environment variable to customize which GCS bucket locations are allowed,
 // if the list is empty defaults to using the region of the zone (format, comma
@@ -262,6 +272,38 @@ Status GetBoolValue(const Json::Value& parent, const char* name, bool* result) {
   return Status::OK();
 }
 
+/// Get GCS Retry Config by applying user overrides through env if any.
+RetryConfig GetGcsRetryConfig() {
+  RetryConfig retryConfig(
+      /* init_delay_time_us = */ 1000 * 1000,
+      /* max_delay_time_us = */ 32 * 1000 * 1000,
+      /* max_retries = */ 10);
+  // Apply the overrides for Retry configs.
+  uint64 init_delay_time_us;
+  if (GetEnvVar(kRetryConfigInitialDelayTimeUs, strings::safe_strtou64,
+                &init_delay_time_us)) {
+    retryConfig.init_delay_time_us = init_delay_time_us;
+  }
+
+  uint64 max_delay_time_us;
+  if (GetEnvVar(kRetryConfigMaxDelayTimeUs, strings::safe_strtou64,
+                &max_delay_time_us)) {
+    retryConfig.max_delay_time_us = max_delay_time_us;
+  }
+
+  uint32 max_retries;
+  if (GetEnvVar(kRetryConfigMaxRetries, strings::safe_strtou32, &max_retries)) {
+    retryConfig.max_retries = max_retries;
+  }
+
+  VLOG(1) << "GCS RetryConfig: "
+          << "init_delay_time_us = " << retryConfig.init_delay_time_us << " ; "
+          << "max_delay_time_us = " << retryConfig.max_delay_time_us << " ; "
+          << "max_retries = " << retryConfig.max_retries;
+
+  return retryConfig;
+}
+
 /// A GCS-based implementation of a random access file with an LRU block cache.
 class GcsRandomAccessFile : public RandomAccessFile {
  public:
@@ -724,7 +766,7 @@ class GcsWritableFile : public WritableFile {
   GcsFileSystem::TimeoutConfig* timeouts_;
   std::function<void()> file_cache_erase_;
   bool sync_needed_;  // whether there is buffered data that needs to be synced
-  RetryConfig retry_config_;
+  RetryConfig retry_config_ = GetGcsRetryConfig();
   bool compose_append_;
   uint64 start_offset_;
   // Callbacks to the file system used to upload object into GCS.
@@ -926,6 +968,8 @@ GcsFileSystem::GcsFileSystem(bool make_default_cache) {
   } else {
     compose_append_ = false;
   }
+
+  retry_config_ = GetGcsRetryConfig();
 }
 
 GcsFileSystem::GcsFileSystem(
@@ -2126,6 +2170,10 @@ Status GcsFileSystem::CreateHttpRequest(std::unique_ptr<HttpRequest>* request) {
   return Status::OK();
 }
 
+RetryingGcsFileSystem::RetryingGcsFileSystem()
+    : RetryingFileSystem(std::make_unique<GcsFileSystem>(),
+                         RetryConfig(GetGcsRetryConfig())) {}
+
 }  // namespace tensorflow
 
 // The TPU_GCS_FS option sets a TPU-on-GCS optimized file system that allows
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h
index 47e1670bac859d..2ef42d4f714c34 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.h
+++ b/tensorflow/core/platform/cloud/gcs_file_system.h
@@ -447,9 +447,7 @@ class GcsFileSystem : public FileSystem {
 /// Google Cloud Storage implementation of a file system with retry on failures.
 class RetryingGcsFileSystem : public RetryingFileSystem<GcsFileSystem> {
  public:
-  RetryingGcsFileSystem()
-      : RetryingFileSystem(std::unique_ptr<GcsFileSystem>(new GcsFileSystem),
-                           RetryConfig(100000 /* init_delay_time_us */)) {}
+  RetryingGcsFileSystem();
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index c8bdb3188761aa..68ed41c9f52075 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -22,7 +22,7 @@ limitations under the License.
 // tensorflow/tools/pip_package/setup.py
 #define TF_MAJOR_VERSION 2
 #define TF_MINOR_VERSION 9
-#define TF_PATCH_VERSION 0
+#define TF_PATCH_VERSION 3
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
diff --git a/tensorflow/core/util/bcast.h b/tensorflow/core/util/bcast.h
index cbd7a6f8693e35..531c47aec3dab8 100644
--- a/tensorflow/core/util/bcast.h
+++ b/tensorflow/core/util/bcast.h
@@ -134,7 +134,7 @@ BCastList<N>::BCastList(const BCastList::Vec (&x)[N],
   typedef BCastList::Vec Vec;
 
   // Safely multiplies dimensions taking into account symbolic shapes.
-  auto mul_dims = [](int64_t dim1, int64_t dim2) -> int64 {
+  auto mul_dims = [](int64_t dim1, int64_t dim2) -> int64_t {
     return dim1 != 0 && dim2 != 0 && (dim1 < 0 || dim2 < 0) ? -1 : dim1 * dim2;
   };
 
@@ -199,7 +199,7 @@ BCastList<N>::BCastList(const BCastList::Vec (&x)[N],
   }
   Vec output;
   bool output_dim_set = false;
-  int output_dim = -1;
+  int64_t output_dim = -1;
   bool none_is_one = true;
   bool set_one = false;
   for (int j = 0; j < largest_rank; ++j) {
diff --git a/tensorflow/core/util/bcast_test.cc b/tensorflow/core/util/bcast_test.cc
index 60eb7859ec3f15..f33483a5846d67 100644
--- a/tensorflow/core/util/bcast_test.cc
+++ b/tensorflow/core/util/bcast_test.cc
@@ -375,6 +375,13 @@ TEST(BCastTest, Basic_Tensor_Scalar) {
             "[11,7,5,3,2]"
             "[11,7,5,3,2]"
             "[0,1,2,3,4][]");
+
+  // int32 edge-case:
+  EXPECT_EQ(BCast({1, 2147483648}, {1}),
+            "[2147483648][1][1][2147483648]"
+            "[2147483648]"
+            "[1,2147483648]"
+            "[0][0,1]");
 }
 
 TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
diff --git a/tensorflow/core/util/strided_slice_op.cc b/tensorflow/core/util/strided_slice_op.cc
index 5342b26d859495..8bdfe1c85d5c92 100644
--- a/tensorflow/core/util/strided_slice_op.cc
+++ b/tensorflow/core/util/strided_slice_op.cc
@@ -79,6 +79,18 @@ struct StridedSliceDenseSpec {
 template <class T>
 static Status TF_MUST_USE_RESULT BuildDenseSpec(
     const StridedSliceSparseSpec& sparse, StridedSliceDenseSpec* dense) {
+  if (dense->dims < 0) {
+    return errors::InvalidArgument("Unexpected negative dense.dims: %d",
+                                   dense->dims);
+  }
+
+  if (dense->dims >= 1024) {
+    // We do not expect to see tensors with rank >= 1024, it must mean that
+    // there is a bug somewhere.
+    return errors::InvalidArgument("Unexpected large dense.dims: %d",
+                                   dense->dims);
+  }
+
   // Build expanded begin, end, strides, begin_mask, end_mask
   // to remove any ellipsis
   dense->begin.resize(dense->dims);
@@ -176,6 +188,11 @@ Status ValidateStridedSliceOp(
     gtl::InlinedVector<int64_t, 4>* begin, gtl::InlinedVector<int64_t, 4>* end,
     gtl::InlinedVector<int64_t, 4>* strides,
     StridedSliceShapeSpec* shape_spec) {
+  if (input_shape.unknown_rank()) {
+    // Note: If the rank is unknown, "input_shape.dims()" is -1.
+    return errors::InvalidArgument("Unexpected input_shape with unknown rank");
+  }
+
   const bool begin_is_wrong =
       begin_tensor != nullptr &&
       !(TensorShapeUtils::IsVector(begin_tensor->shape()) &&
diff --git a/tensorflow/core/util/strided_slice_op.h b/tensorflow/core/util/strided_slice_op.h
index 98928d4a81d70e..ccf8b0fddff9f9 100644
--- a/tensorflow/core/util/strided_slice_op.h
+++ b/tensorflow/core/util/strided_slice_op.h
@@ -53,6 +53,9 @@ struct StridedSliceShapeSpec {
 // <processing_shape> are valid; <is_identity>, <is_simple_slice> and other
 // output parameters will not be accurate.
 //
+// If the rank of <input_shape> is unknown (i.e., "input_shape.unknown_rank()"
+// is true)), the method returns an invalid status.
+//
 // If <begin_tensor> or <end_tensor> are nullptr, <begin> and <end> will not be
 // valid. In this case, <slice_dim0> and <is_identity> will be true only if a
 // determination can be made based on the information given. A best effort is
diff --git a/tensorflow/core/util/tensor_slice_writer.cc b/tensorflow/core/util/tensor_slice_writer.cc
index a74e2a04cedbe5..731bce308fc9fb 100644
--- a/tensorflow/core/util/tensor_slice_writer.cc
+++ b/tensorflow/core/util/tensor_slice_writer.cc
@@ -131,6 +131,16 @@ Status TensorSliceWriter::Finish() {
 
 /* static */
 size_t TensorSliceWriter::MaxBytesPerElement(DataType dt) {
+  size_t max_bytes_per_element =
+      TensorSliceWriter::MaxBytesPerElementOrZero(dt);
+  if (max_bytes_per_element == 0) {
+    LOG(FATAL) << "MaxBytesPerElement not implemented for dtype: " << dt;
+  }
+  return max_bytes_per_element;
+}
+
+/* static */
+size_t TensorSliceWriter::MaxBytesPerElementOrZero(DataType dt) {
   switch (dt) {
     case DT_FLOAT:
       return 4;
@@ -170,9 +180,8 @@ size_t TensorSliceWriter::MaxBytesPerElement(DataType dt) {
     case DT_STRING:
     case DT_BFLOAT16:
     default:
-      LOG(FATAL) << "MaxBytesPerElement not implemented for dtype: " << dt;
+      return 0;
   }
-  return 0;
 }
 
 template <>
diff --git a/tensorflow/core/util/tensor_slice_writer.h b/tensorflow/core/util/tensor_slice_writer.h
index 01f2e62dfbd2bc..9aa51c29cb323d 100644
--- a/tensorflow/core/util/tensor_slice_writer.h
+++ b/tensorflow/core/util/tensor_slice_writer.h
@@ -68,6 +68,8 @@ class TensorSliceWriter {
   static size_t MaxBytesPerElement(DataType dt);
 
  private:
+  static size_t MaxBytesPerElementOrZero(DataType dt);
+
   static constexpr size_t kMaxMessageBytes = 1LL << 31;
   // Filling in the TensorProto in a SavedSlice will add the following
   // header bytes, in addition to the data:
@@ -162,9 +164,15 @@ Status TensorSliceWriter::Add(const string& name, const TensorShape& shape,
 template <typename T>
 Status TensorSliceWriter::SaveData(const T* data, int64_t num_elements,
                                    SavedSlice* ss) {
-  size_t size_bound =
-      ss->ByteSize() + kTensorProtoHeaderBytes +
-      (MaxBytesPerElement(DataTypeToEnum<T>::value) * num_elements);
+  size_t max_bytes_per_element =
+      MaxBytesPerElementOrZero(DataTypeToEnum<T>::value);
+  if (max_bytes_per_element == 0) {
+    return errors::InvalidArgument(
+        "Tensor slice serialization not implemented for dtype ",
+        DataTypeToEnum<T>::value);
+  }
+  size_t size_bound = ss->ByteSize() + kTensorProtoHeaderBytes +
+                      (max_bytes_per_element * num_elements);
   if (size_bound > kMaxMessageBytes) {
     return errors::InvalidArgument(
         "Tensor slice is too large to serialize (conservative estimate: ",
diff --git a/tensorflow/core/util/tensor_slice_writer_test.cc b/tensorflow/core/util/tensor_slice_writer_test.cc
index c9740730a66a26..5120be5a8b5e35 100644
--- a/tensorflow/core/util/tensor_slice_writer_test.cc
+++ b/tensorflow/core/util/tensor_slice_writer_test.cc
@@ -15,17 +15,19 @@ limitations under the License.
 
 #include "tensorflow/core/util/tensor_slice_writer.h"
 
+#include <algorithm>
 #include <array>
+#include <memory>
+#include <vector>
 
 #include "tensorflow/core/framework/tensor_shape.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/core/stringpiece.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/path.h"
 #include "tensorflow/core/platform/protobuf.h"
 #include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/error_codes.pb.h"
 #include "tensorflow/core/public/version.h"
 #include "tensorflow/core/util/saved_tensor_slice_util.h"
 #include "tensorflow/core/util/tensor_slice_reader.h"
@@ -362,6 +364,17 @@ TEST(TensorSliceWriteTest, SizeErrors) {
   }
 }
 
+TEST(TensorSliceWriterTest, InvalidInput) {
+  SavedSlice ss;
+  std::array<uint32_t, 1> data;
+  std::fill(data.begin(), data.end(), 1234);
+  Status s = TensorSliceWriter::SaveData(data.data(), data.size(), &ss);
+  EXPECT_EQ(s.code(), error::INVALID_ARGUMENT);
+  EXPECT_TRUE(absl::StrContains(
+      s.error_message(),
+      "Tensor slice serialization not implemented for dtype"));
+}
+
 }  // namespace checkpoint
 
 }  // namespace tensorflow
diff --git a/tensorflow/lite/core/api/BUILD b/tensorflow/lite/core/api/BUILD
index 3f1bbca7054299..6135eaf41dd798 100644
--- a/tensorflow/lite/core/api/BUILD
+++ b/tensorflow/lite/core/api/BUILD
@@ -88,6 +88,7 @@ cc_library(
     copts = tflite_copts(),
     visibility = op_resolver_internal_visibility_allowlist() + [
         "//tensorflow/lite:__pkg__",
+        "//tensorflow/lite/java/src/main/native:__pkg__",
     ],
     deps = [":op_resolver"],
 )
diff --git a/tensorflow/lite/kernels/gather_nd.cc b/tensorflow/lite/kernels/gather_nd.cc
index c323b16a161f0b..f615028112923f 100644
--- a/tensorflow/lite/kernels/gather_nd.cc
+++ b/tensorflow/lite/kernels/gather_nd.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include <stdint.h>
 
+#include "tensorflow/lite/c/c_api_types.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
@@ -102,13 +103,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <typename ParamsT, typename IndicesT>
-TfLiteStatus GatherNd(const TfLiteTensor* params, const TfLiteTensor* indices,
-                      TfLiteTensor* output) {
-  reference_ops::GatherNd(
+TfLiteStatus GatherNd(TfLiteContext* context, const TfLiteTensor* params,
+                      const TfLiteTensor* indices, TfLiteTensor* output) {
+  const TfLiteStatus status = reference_ops::GatherNd(
       GetTensorShape(params), GetTensorData<ParamsT>(params),
       GetTensorShape(indices), GetTensorData<IndicesT>(indices),
       GetTensorShape(output), GetTensorData<ParamsT>(output));
-  return kTfLiteOk;
+  if (status != kTfLiteOk) {
+    TF_LITE_KERNEL_LOG(context, "gather_nd index out of bounds");
+  }
+  return status;
 }
 
 template <typename IndicesT>
@@ -136,17 +140,17 @@ TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params,
 
   switch (params->type) {
     case kTfLiteFloat32:
-      return GatherNd<float, IndicesT>(params, indices, output);
+      return GatherNd<float, IndicesT>(context, params, indices, output);
     case kTfLiteUInt8:
-      return GatherNd<uint8_t, IndicesT>(params, indices, output);
+      return GatherNd<uint8_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt8:
-      return GatherNd<int8_t, IndicesT>(params, indices, output);
+      return GatherNd<int8_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt16:
-      return GatherNd<int16_t, IndicesT>(params, indices, output);
+      return GatherNd<int16_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt32:
-      return GatherNd<int32_t, IndicesT>(params, indices, output);
+      return GatherNd<int32_t, IndicesT>(context, params, indices, output);
     case kTfLiteInt64:
-      return GatherNd<int64_t, IndicesT>(params, indices, output);
+      return GatherNd<int64_t, IndicesT>(context, params, indices, output);
     case kTfLiteString:
       return GatherNdString<IndicesT>(params, indices, output);
     default:
diff --git a/tensorflow/lite/kernels/gather_nd_test.cc b/tensorflow/lite/kernels/gather_nd_test.cc
index 39130a2d9b66e9..1f4c19be0faaf2 100644
--- a/tensorflow/lite/kernels/gather_nd_test.cc
+++ b/tensorflow/lite/kernels/gather_nd_test.cc
@@ -73,6 +73,22 @@ TEST(GatherNdOpTest, ElementIndexingIntoMatrix) {
   EXPECT_THAT(m.GetOutput<float>(), ElementsAreArray({1.1, 2.2}));
 }
 
+TEST(GatherNdOpTest, ErrorOnOutOfBoundsTooLarge) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({0, 0, 2, 0});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+  m.SetPositions<int32_t>({0, 0, 1, 2});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+}
+
+TEST(GatherNdOpTest, ErrorOnOutOfBoundsNegative) {
+  GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 2}});
+  m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
+  m.SetPositions<int32_t>({1, -1, 1, 1});
+  EXPECT_EQ(m.Invoke(), kTfLiteError);
+}
+
 TEST(GatherNdOpTest, SliceIndexingIntoMatrix) {
   GatherNdOpModel m({TensorType_FLOAT32, {2, 2}}, {TensorType_INT32, {2, 1}});
   m.SetInput<float>({1.1, 1.2, 2.1, 2.2});
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 975aa0b4705971..52591ad706da88 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -1058,7 +1058,6 @@ cc_test(
     srcs = [
         "strided_slice_logic_test.cc",
     ],
-    shard_count = 4,
     deps = [
         ":strided_slice_logic",
         "@com_google_googletest//:gtest_main",
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index 500f2454da8067..162d16d44848c7 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -5000,108 +5000,6 @@ inline void Slice(const tflite::SliceParams& op_params,
   return Slice(op_params, input_shape, output_shape, &writer);
 }
 
-// Note: This implementation is only optimized for the case where the inner
-// stride == 1.
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const RuntimeShape& unextended_output_shape,
-                         SequentialTensorWriter<T>* writer) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-
-  ruy::profiler::ScopeLabel label("StridedSlice");
-
-  // Note that the output_shape is not used herein.
-  tflite::StridedSliceParams params_copy = op_params;
-
-  TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 5);
-  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 5);
-  const RuntimeShape input_shape =
-      RuntimeShape::ExtendedShape(5, unextended_input_shape);
-  const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(5, unextended_output_shape);
-
-  // Reverse and pad to 5 dimensions because that is what the runtime code
-  // requires (ie. all shapes must be 5D and are given backwards).
-  strided_slice::StridedSlicePadIndices(&params_copy, 5);
-
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-  const bool inner_stride_is_1 = params_copy.strides[4] == 1;
-
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          // When the stride is 1, the inner loop is equivalent to the
-          // optimized slice inner loop. Otherwise, it is identical to the
-          // strided_slice reference implementation inner loop.
-          if (inner_stride_is_1) {
-            const int len = stop_4 - start_4;
-            if (len > 0) {
-              writer->WriteN(offset_3 + start_4, len);
-            }
-          } else {
-            for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-                 !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-                 offset_4 += params_copy.strides[4]) {
-              writer->Write(offset_4);
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const T* input_data,
-                         const RuntimeShape& unextended_output_shape,
-                         T* output_data) {
-  SequentialTensorWriter<T> writer(input_data, output_data);
-  StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
-                  &writer);
-}
-
-template <typename T>
-inline void StridedSlice(const tflite::StridedSliceParams& op_params,
-                         const RuntimeShape& unextended_input_shape,
-                         const TfLiteTensor* input,
-                         const RuntimeShape& unextended_output_shape,
-                         TfLiteTensor* output) {
-  SequentialTensorWriter<T> writer(input, output);
-  StridedSlice<T>(op_params, unextended_input_shape, unextended_output_shape,
-                  &writer);
-}
-
 template <typename T>
 void Minimum(const RuntimeShape& input1_shape, const T* input1_data,
              const T* input2_data, const RuntimeShape& output_shape,
diff --git a/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h b/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
index d0e2ef3026e4a2..322b3c594555e3 100644
--- a/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
+++ b/tensorflow/lite/kernels/internal/reference/conv3d_transpose.h
@@ -111,14 +111,13 @@ inline void Conv3DTranspose(
   if (bias_data) {
     const int outer_size =
         batches * output_depth * output_height * output_width;
-    const int num_channels = input_shape.Dims(4);
     for (int n = 0; n < outer_size; ++n) {
       for (int c = 0; c < output_num_channels; ++c) {
         data_ptr[c] = ActivationFunctionWithMinMax(data_ptr[c] + bias_data[c],
                                                    float_activation_min,
                                                    float_activation_max);
       }
-      data_ptr += num_channels;
+      data_ptr += output_num_channels;
     }
   } else {
     const int flat_size = output_shape.FlatSize();
diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index ba4f3cc7d0a737..ba8107e8f3f4e4 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -29,6 +29,7 @@ limitations under the License.
 #include "third_party/eigen3/Eigen/Core"
 #include "fixedpoint/fixedpoint.h"
 #include "ruy/profiler/instrumentation.h"  // from @ruy
+#include "tensorflow/lite/c/c_api_types.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
@@ -581,23 +582,31 @@ inline GatherNdHelperResult GatherNdHelper(const RuntimeShape& params_shape,
   return ret;
 }
 
+// Implements GatherNd.
+// Returns an error if any of the indices_data would cause an out of bounds
+// memory read.
 template <typename ParamsT, typename IndicesT = int32>
-inline void GatherNd(const RuntimeShape& params_shape,
-                     const ParamsT* params_data,
-                     const RuntimeShape& indices_shape,
-                     const IndicesT* indices_data,
-                     const RuntimeShape& output_shape, ParamsT* output_data) {
+inline TfLiteStatus GatherNd(const RuntimeShape& params_shape,
+                             const ParamsT* params_data,
+                             const RuntimeShape& indices_shape,
+                             const IndicesT* indices_data,
+                             const RuntimeShape& output_shape,
+                             ParamsT* output_data) {
   ruy::profiler::ScopeLabel label("GatherNd");
 
   const GatherNdHelperResult res = GatherNdHelper(params_shape, indices_shape);
   for (int i = 0; i < res.n_slices; ++i) {
-    int from_pos = 0;
+    int64_t from_pos = 0;
     for (int j = 0; j < res.indices_nd; ++j) {
       from_pos += indices_data[i * res.indices_nd + j] * res.dims_to_count[j];
     }
+    if (from_pos < 0 || from_pos + res.slice_size > params_shape.FlatSize()) {
+      return kTfLiteError;
+    }
     std::memcpy(output_data + i * res.slice_size, params_data + from_pos,
                 sizeof(ParamsT) * res.slice_size);
   }
+  return kTfLiteOk;
 }
 
 #ifndef TF_LITE_STATIC_MEMORY
@@ -626,11 +635,12 @@ inline void GatherNdString(const RuntimeShape& params_shape,
 #endif
 
 template <typename IndicesT, typename UpdatesT>
-inline void ScatterNd(const RuntimeShape& indices_shape,
-                      const IndicesT* indices_data,
-                      const RuntimeShape& updates_shape,
-                      const UpdatesT* updates_data,
-                      const RuntimeShape& output_shape, UpdatesT* output_data) {
+inline TfLiteStatus ScatterNd(const RuntimeShape& indices_shape,
+                              const IndicesT* indices_data,
+                              const RuntimeShape& updates_shape,
+                              const UpdatesT* updates_data,
+                              const RuntimeShape& output_shape,
+                              UpdatesT* output_data) {
   ruy::profiler::ScopeLabel label("ScatterNd");
 
   int n_slices = 1;
@@ -653,18 +663,24 @@ inline void ScatterNd(const RuntimeShape& indices_shape,
     remain_flat_size = dims_to_count[i];
   }
 
+  if (n_slices * slice_size > updates_shape.FlatSize()) {
+    return kTfLiteError;
+  }
   memset(output_data, 0, sizeof(UpdatesT) * output_flat_size);
   for (int i = 0; i < n_slices; ++i) {
     int to_pos = 0;
     for (int j = 0; j < indices_nd; ++j) {
       IndicesT idx = indices_data[i * indices_nd + j];
-      TFLITE_DCHECK(0 <= idx && idx < output_shape.Dims(j));
       to_pos += idx * dims_to_count[j];
     }
+    if (to_pos < 0 || to_pos + slice_size > output_flat_size) {
+      return kTfLiteError;
+    }
     for (int j = 0; j < slice_size; j++) {
       output_data[to_pos + j] += updates_data[i * slice_size + j];
     }
   }
+  return kTfLiteOk;
 }
 
 template <typename T>
diff --git a/tensorflow/lite/kernels/internal/reference/strided_slice.h b/tensorflow/lite/kernels/internal/reference/strided_slice.h
index 40dc2e91022015..ff367cf95f19b6 100644
--- a/tensorflow/lite/kernels/internal/reference/strided_slice.h
+++ b/tensorflow/lite/kernels/internal/reference/strided_slice.h
@@ -31,10 +31,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
                          const RuntimeShape& unextended_input_shape,
                          const RuntimeShape& unextended_output_shape,
                          SequentialTensorWriter<T>* writer) {
-  using strided_slice::LoopCondition;
-  using strided_slice::StartForAxis;
-  using strided_slice::StopForAxis;
-
   ruy::profiler::ScopeLabel label("StridedSlice");
 
   // Note that the output_shape is not used herein.
@@ -51,41 +47,67 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params,
   // requires (ie. all shapes must be 5D and are given backwards).
   strided_slice::StridedSlicePadIndices(&params_copy, 5);
 
-  const int start_0 = StartForAxis(params_copy, input_shape, 0);
-  const int stop_0 = StopForAxis(params_copy, input_shape, 0, start_0);
-  const int start_1 = StartForAxis(params_copy, input_shape, 1);
-  const int stop_1 = StopForAxis(params_copy, input_shape, 1, start_1);
-  const int start_2 = StartForAxis(params_copy, input_shape, 2);
-  const int stop_2 = StopForAxis(params_copy, input_shape, 2, start_2);
-  const int start_3 = StartForAxis(params_copy, input_shape, 3);
-  const int stop_3 = StopForAxis(params_copy, input_shape, 3, start_3);
-  const int start_4 = StartForAxis(params_copy, input_shape, 4);
-  const int stop_4 = StopForAxis(params_copy, input_shape, 4, start_4);
-
-  for (int offset_0 = start_0 * input_shape.Dims(1),
-           end_0 = stop_0 * input_shape.Dims(1),
-           step_0 = params_copy.strides[0] * input_shape.Dims(1);
-       !LoopCondition(offset_0, end_0, params_copy.strides[0]);
-       offset_0 += step_0) {
-    for (int offset_1 = (offset_0 + start_1) * input_shape.Dims(2),
-             end_1 = (offset_0 + stop_1) * input_shape.Dims(2),
-             step_1 = params_copy.strides[1] * input_shape.Dims(2);
-         !LoopCondition(offset_1, end_1, params_copy.strides[1]);
-         offset_1 += step_1) {
-      for (int offset_2 = (offset_1 + start_2) * input_shape.Dims(3),
-               end_2 = (offset_1 + stop_2) * input_shape.Dims(3),
-               step_2 = params_copy.strides[2] * input_shape.Dims(3);
-           !LoopCondition(offset_2, end_2, params_copy.strides[2]);
-           offset_2 += step_2) {
-        for (int offset_3 = (offset_2 + start_3) * input_shape.Dims(4),
-                 end_3 = (offset_2 + stop_3) * input_shape.Dims(4),
-                 step_3 = params_copy.strides[3] * input_shape.Dims(4);
-             !LoopCondition(offset_3, end_3, params_copy.strides[3]);
-             offset_3 += step_3) {
-          for (int offset_4 = offset_3 + start_4, end_4 = offset_3 + stop_4;
-               !LoopCondition(offset_4, end_4, params_copy.strides[4]);
-               offset_4 += params_copy.strides[4]) {
-            writer->Write(offset_4);
+  const int start_0 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 0);
+  const int stop_0 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 0, start_0);
+  const int start_1 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 1);
+  const int stop_1 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 1, start_1);
+  const int start_2 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 2);
+  const int stop_2 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 2, start_2);
+  const int start_3 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 3);
+  const int stop_3 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 3, start_3);
+  const int start_4 =
+      strided_slice::StridedSliceStartForAxis(params_copy, input_shape, 4);
+  const int stop_4 = strided_slice::StridedSliceEndForAxis(
+      params_copy, input_shape, 4, start_4);
+
+  auto lc = [&](int end, int stride, int index) {
+    if (stride < 0) {
+      return index > end;
+    } else {
+      return index < end;
+    }
+  };
+  const int* shape = input_shape.DimsData();
+  const int* stride = params_copy.strides;
+  const bool inner_stride_is_1 = params_copy.strides[4] == 1;
+
+  for (int offset_0 = start_0; lc(stop_0, stride[0], offset_0);
+       offset_0 += stride[0]) {
+    for (int offset_1 = start_1; lc(stop_1, stride[1], offset_1);
+         offset_1 += stride[1]) {
+      for (int offset_2 = start_2; lc(stop_2, stride[2], offset_2);
+           offset_2 += stride[2]) {
+        for (int offset_3 = start_3; lc(stop_3, stride[3], offset_3);
+             offset_3 += stride[3]) {
+          // When the stride is 1, the inner loop is equivalent to the
+          // optimized slice inner loop. Otherwise, it is identical to the
+          // strided_slice reference implementation inner loop.
+          if (inner_stride_is_1) {
+            const int len = stop_4 - start_4;
+            int index = start_4 + offset_3 * shape[4] +
+                        offset_2 * shape[3] * shape[4] +
+                        offset_1 * shape[2] * shape[3] * shape[4] +
+                        offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+            if (len > 0) {
+              writer->WriteN(index, len);
+            }
+          } else {
+            for (int offset_4 = start_4; lc(stop_4, stride[4], offset_4);
+                 offset_4 += stride[4]) {
+              int index = offset_4 + offset_3 * shape[4] +
+                          offset_2 * shape[3] * shape[4] +
+                          offset_1 * shape[2] * shape[3] * shape[4] +
+                          offset_0 * shape[1] * shape[2] * shape[3] * shape[4];
+              writer->Write(index);
+            }
           }
         }
       }
diff --git a/tensorflow/lite/kernels/internal/strided_slice_logic.h b/tensorflow/lite/kernels/internal/strided_slice_logic.h
index bfe84050dca156..2efdcf26fe07a4 100644
--- a/tensorflow/lite/kernels/internal/strided_slice_logic.h
+++ b/tensorflow/lite/kernels/internal/strided_slice_logic.h
@@ -69,6 +69,69 @@ inline void StridedSlicePadIndices(tflite::StridedSliceParams* p,
   p->strides_count = dim_count;
 }
 
+// Return the index for the first element along that axis. This index will be a
+// positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
+// that can be used to index directly into the data.
+inline int StridedSliceStartForAxis(const tflite::StridedSliceParams& params,
+                                    const RuntimeShape& input_shape,
+                                    int32_t axis) {
+  const int32_t axis_size = input_shape.Dims(axis);
+  int32_t start = params.start_indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t begin_mask = (params.begin_mask & 1 << axis);
+  if (start < 0) {
+    start += axis_size;
+  }
+  if (stride > 0) {
+    start = Clamp(start, 0, axis_size);
+  } else {
+    start = Clamp(start, -1, axis_size - 1);
+  }
+  if (begin_mask) {
+    if (stride > 0) {
+      start = 0;
+    } else {
+      start = axis_size - 1;
+    }
+  }
+  return start;
+}
+
+inline int StridedSliceEndForAxis(const tflite::StridedSliceParams& params,
+                                  const RuntimeShape& input_shape, int axis,
+                                  int start) {
+  const auto shrink_axis_mask = params.shrink_axis_mask;
+  const bool shrink_axis = shrink_axis_mask & (1 << axis);
+  const int axis_size = input_shape.Dims(axis);
+  if (shrink_axis) {
+    if (start >= axis_size) {
+      return start;
+    } else {
+      return start + 1;
+    }
+  }
+  const auto* indices = params.stop_indices;
+  int end = indices[axis];
+  const int32_t stride = params.strides[axis];
+  const int32_t end_mask = (params.end_mask & 1 << axis);
+  if (end < 0) {
+    end += axis_size;
+  }
+  if (stride > 0) {
+    end = Clamp(end, 0, axis_size);
+  } else {
+    end = Clamp(end, -1, axis_size - 1);
+  }
+  if (end_mask) {
+    if (stride > 0) {
+      end = axis_size;
+    } else {
+      end = -1;
+    }
+  }
+  return end;
+}
+
 // Return the index for the first element along that axis. This index will be a
 // positive integer between [0, axis_size] (or [-1, axis_size -1] if stride < 0)
 // that can be used to index directly into the data.
diff --git a/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc b/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
index 628e72698917c5..494d07690a2106 100644
--- a/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
+++ b/tensorflow/lite/kernels/internal/strided_slice_logic_test.cc
@@ -76,5 +76,119 @@ TEST(RunStridedSlicePadIndices, Pad3) {
   );
 }
 
+TEST(StridedSliceStartForAxis, NegativeOOBIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -11;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, NegativeOneTheBoundaryIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -10;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, NegativeWithinBoundsIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -9;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 1);
+}
+
+TEST(StridedSliceStartForAxis, MinusOneIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = -1;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 9);
+}
+
+TEST(StridedSliceStartForAxis, ZeroIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 0;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 0);
+}
+
+TEST(StridedSliceStartForAxis, OneIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 1;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 1);
+}
+
+TEST(StridedSliceStartForAxis, PositiveBoundaryIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 9;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 9);
+}
+
+TEST(StridedSliceStartForAxis, PositiveOOBIndexSizeofArray) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 10;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 10);
+}
+
+TEST(StridedSliceStartForAxis, PositiveOOBIndex) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 11;
+  params.strides[0] = 1;
+  int start = strided_slice::StridedSliceStartForAxis(
+      params, RuntimeShape({10}), /*axis=*/0);
+  EXPECT_EQ(start, 10);
+}
+
+TEST(StridedSliceStartForAxis, TenFourMinus1) {
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.start_indices[0] = 5;
+  params.stop_indices[0] = 2;
+  params.strides[0] = -1;
+  int start = strided_slice::StridedSliceStartForAxis(params, RuntimeShape({4}),
+                                                      /*axis=*/0);
+  int stop = strided_slice::StridedSliceEndForAxis(params, RuntimeShape({4}),
+                                                   /*axis=*/0, start);
+  EXPECT_EQ(start, 3);
+  EXPECT_EQ(stop, 2);
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/scatter_nd.cc b/tensorflow/lite/kernels/scatter_nd.cc
index 93e2fe36c3fb26..144c07a2da7b4f 100644
--- a/tensorflow/lite/kernels/scatter_nd.cc
+++ b/tensorflow/lite/kernels/scatter_nd.cc
@@ -128,11 +128,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 template <typename IndicesT, typename UpdatesT>
 TfLiteStatus ScatterNd(const TfLiteTensor* indices, const TfLiteTensor* updates,
                        TfLiteTensor* output) {
-  reference_ops::ScatterNd(
+  return reference_ops::ScatterNd(
       GetTensorShape(indices), GetTensorData<IndicesT>(indices),
       GetTensorShape(updates), GetTensorData<UpdatesT>(updates),
       GetTensorShape(output), GetTensorData<UpdatesT>(output));
-  return kTfLiteOk;
 }
 
 template <typename IndicesT>
@@ -148,23 +147,36 @@ TfLiteStatus EvalScatterNd(TfLiteContext* context, const TfLiteTensor* indices,
                       ResizeOutputTensor<IndicesT>(context, shape, output));
   }
 
+  TfLiteStatus status = kTfLiteError;
   switch (updates->type) {
     case kTfLiteFloat32:
-      return ScatterNd<IndicesT, float>(indices, updates, output);
+      status = ScatterNd<IndicesT, float>(indices, updates, output);
+      break;
     case kTfLiteUInt8:
-      return ScatterNd<IndicesT, uint8_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, uint8_t>(indices, updates, output);
+      break;
+    case kTfLiteBool:
+      status = ScatterNd<IndicesT, bool>(indices, updates, output);
+      break;
     case kTfLiteInt8:
-      return ScatterNd<IndicesT, int8_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int8_t>(indices, updates, output);
+      break;
     case kTfLiteInt32:
-      return ScatterNd<IndicesT, int32_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int32_t>(indices, updates, output);
+      break;
     case kTfLiteInt64:
-      return ScatterNd<IndicesT, int64_t>(indices, updates, output);
+      status = ScatterNd<IndicesT, int64_t>(indices, updates, output);
+      break;
     default:
       context->ReportError(
           context, "Updates of type '%s' are not supported by scatter_nd.",
           TfLiteTypeGetName(updates->type));
       return kTfLiteError;
   }
+  if (status != kTfLiteOk) {
+    context->ReportError(context, "scatter_nd index out of bounds");
+  }
+  return status;
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
diff --git a/tensorflow/lite/kernels/scatter_nd_test.cc b/tensorflow/lite/kernels/scatter_nd_test.cc
index bba796bd5137db..43e35bdc106a76 100644
--- a/tensorflow/lite/kernels/scatter_nd_test.cc
+++ b/tensorflow/lite/kernels/scatter_nd_test.cc
@@ -347,5 +347,34 @@ TEST(ScatterNdOpTest, DynamicShape) {
                                 /*2, 3*/ 1,  2,  3,  4,  5}));
 }
 
+TEST(ScatterNdOpTest, ReadAndWriteArrayLimits) {
+  ScatterNdOpModel m({TensorType_INT32, {5, 1}}, {TensorType_INT32, {5}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4, 3, 1, 0, 2});
+  m.SetUpdates<int32_t>({1, 2, 3, 7, 9});
+  m.SetShape<int32_t>({5});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({5}));
+  EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({7, 3, 9, 2, 1}));
+}
+
+TEST(ScatterNdOpTest, OOBRead) {
+  ScatterNdOpModel m({TensorType_INT32, {1, 1}}, {TensorType_INT32, {1}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4});
+  m.SetUpdates<int32_t>({1});
+  m.SetShape<int32_t>({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteError);
+}
+
+TEST(ScatterNdOpTest, OOBWrites) {
+  ScatterNdOpModel m({TensorType_INT32, {5, 1}}, {TensorType_INT32, {5}},
+                     {TensorType_INT32, {1}});
+  m.SetIndices<int32_t>({4, 3, 1, -0x38, 0x38});
+  m.SetUpdates<int32_t>({1, 2, 3, 0x44444444, 0x55555555});
+  m.SetShape<int32_t>({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteError);
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/strided_slice.cc b/tensorflow/lite/kernels/strided_slice.cc
index 55aecc9276531e..f6f5d584610b27 100644
--- a/tensorflow/lite/kernels/strided_slice.cc
+++ b/tensorflow/lite/kernels/strided_slice.cc
@@ -24,7 +24,6 @@ limitations under the License.
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
-#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
@@ -70,7 +69,7 @@ struct StridedSliceContext {
 };
 
 StridedSliceParams BuildStridedSliceParams(StridedSliceContext* op_context) {
-  StridedSliceParams op_params;
+  StridedSliceParams op_params{};
 
   // The ellipsis_mask and new_axis_mask in op_params are not used. Those masks
   // are processed here to update begin_mask, end_mask and the index range.
@@ -196,9 +195,9 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
     int32_t stride = op_params.strides[idx];
     TF_LITE_ENSURE_MSG(context, stride != 0, "stride value has to be non-zero");
 
-    int32_t begin = ::tflite::strided_slice::StartForAxis(
+    int32_t begin = ::tflite::strided_slice::StridedSliceStartForAxis(
         op_params, effective_input_shape, idx);
-    int32_t end = ::tflite::strided_slice::StopForAxis(
+    int32_t end = ::tflite::strided_slice::StridedSliceEndForAxis(
         op_params, effective_input_shape, idx, begin);
 
     // When shrinking an axis, the end position does not matter (and can be
@@ -272,43 +271,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   }
   StridedSliceParams op_params = BuildStridedSliceParams(&op_context);
 
-#define TF_LITE_STRIDED_SLICE(data_type)                                 \
-  {                                                                      \
-    if (kernel_type == kGenericOptimized) {                              \
-      optimized_ops::StridedSlice<data_type>(                            \
-          op_params, op_context.effective_input_shape, op_context.input, \
-          GetTensorShape(op_context.output), op_context.output);         \
-    } else {                                                             \
-      reference_ops::StridedSlice<data_type>(                            \
-          op_params, op_context.effective_input_shape, op_context.input, \
-          GetTensorShape(op_context.output), op_context.output);         \
-    }                                                                    \
-  }
-
   switch (op_context.input->type) {
     case kTfLiteFloat32:
-      TF_LITE_STRIDED_SLICE(float);
+      reference_ops::StridedSlice<float>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt32:
-      TF_LITE_STRIDED_SLICE(int32_t);
+      reference_ops::StridedSlice<int32_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt64:
-      TF_LITE_STRIDED_SLICE(int64_t);
+      reference_ops::StridedSlice<int64_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteUInt8:
-      TF_LITE_STRIDED_SLICE(uint8_t);
+      reference_ops::StridedSlice<uint8_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt8:
-      TF_LITE_STRIDED_SLICE(int8_t);
+      reference_ops::StridedSlice<int8_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteInt16:
-      TF_LITE_STRIDED_SLICE(int16_t);
+      reference_ops::StridedSlice<int16_t>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteBool:
-      TF_LITE_STRIDED_SLICE(bool);
+      reference_ops::StridedSlice<bool>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     case kTfLiteString:
-      TF_LITE_STRIDED_SLICE(string);
+      reference_ops::StridedSlice<string>(
+          op_params, op_context.effective_input_shape, op_context.input,
+          GetTensorShape(op_context.output), op_context.output);
       break;
     default:
       TF_LITE_KERNEL_LOG(context,
diff --git a/tensorflow/lite/kernels/strided_slice_test.cc b/tensorflow/lite/kernels/strided_slice_test.cc
index 2dd5777f07d602..c4c5805a4eaf21 100644
--- a/tensorflow/lite/kernels/strided_slice_test.cc
+++ b/tensorflow/lite/kernels/strided_slice_test.cc
@@ -26,6 +26,7 @@ namespace tflite {
 namespace {
 
 using ::testing::ElementsAreArray;
+using ::testing::IsEmpty;
 
 template <typename input_type>
 class StridedSliceOpModel : public SingleOpModel {
@@ -35,7 +36,7 @@ class StridedSliceOpModel : public SingleOpModel {
                       std::initializer_list<int> end_shape,
                       std::initializer_list<int> strides_shape, int begin_mask,
                       int end_mask, int ellipsis_mask, int new_axis_mask,
-                      int shrink_axis_mask) {
+                      int shrink_axis_mask, bool use_simple_allocator = true) {
     input_ = AddInput(GetTensorType<input_type>());
     begin_ = AddInput(TensorType_INT32);
     end_ = AddInput(TensorType_INT32);
@@ -46,7 +47,8 @@ class StridedSliceOpModel : public SingleOpModel {
         CreateStridedSliceOptions(builder_, begin_mask, end_mask, ellipsis_mask,
                                   new_axis_mask, shrink_axis_mask)
             .Union());
-    BuildInterpreter({input_shape, begin_shape, end_shape, strides_shape});
+    BuildInterpreter({input_shape, begin_shape, end_shape, strides_shape},
+                     use_simple_allocator);
   }
 
   void SetInput(std::initializer_list<input_type> data) {
@@ -669,7 +671,7 @@ TYPED_TEST(StridedSliceOpTest, In3D_SmallBeginWithhrinkAxis1) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6}));
 }
 
-TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
+TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBeginEndMask) {
   StridedSliceOpModel<TypeParam> m({1, 1, 2}, {1}, {1}, {1}, 0, 1, 0, 0, 0);
   m.SetInput({1, 2});
   m.SetBegin({1});
@@ -679,6 +681,16 @@ TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2}));
 }
 
+TYPED_TEST(StridedSliceOpTest, In3D_BackwardSmallBegin) {
+  StridedSliceOpModel<TypeParam> m({1, 1, 2}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2});
+  m.SetBegin({1});
+  m.SetEnd({0});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0, 1, 2}));
+}
+
 TYPED_TEST(StridedSliceOpTest, In3D_Backward) {
   StridedSliceOpModel<TypeParam> m({1, 1, 2}, {3}, {3}, {3}, 6, 7, 0, 0, 0);
   m.SetInput({1, 2});
@@ -853,5 +865,86 @@ TYPED_TEST(StridedSliceOpTest, NoInfiniteLoop) {
   ASSERT_EQ(m.InvokeUnchecked(), kTfLiteOk);
 }
 
+TYPED_TEST(StridedSliceOpTest, MinusThreeMinusFourMinusOne) {
+  StridedSliceOpModel<TypeParam> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4});
+  m.SetBegin({-3});
+  m.SetEnd({-4});
+  m.SetStrides({-1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({2}));
+}
+
+TYPED_TEST(StridedSliceOpTest, MinusFourMinusThreeOne) {
+  StridedSliceOpModel<TypeParam> m({4}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4});
+  m.SetBegin({-4});
+  m.SetEnd({-3});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({1}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOne) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 0);
+  m.SetInput({2});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({0}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOneShrinkAxis) {
+  StridedSliceOpModel<TypeParam> m({3}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetInput({1, 2, 3});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({2}));
+}
+
+TYPED_TEST(StridedSliceOpTest, OneOneOneShrinkAxisOOB) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetInput({2});
+  m.SetBegin({1});
+  m.SetEnd({1});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, OutOfBounds) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetBegin({1});
+  m.SetEnd({2});
+  m.SetStrides({1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, StrideOutOfBounds) {
+  StridedSliceOpModel<TypeParam> m({1}, {1}, {1}, {1}, 0, 0, 0, 0, 1);
+  m.SetBegin({1});
+  m.SetEnd({4});
+  m.SetStrides({7});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), IsEmpty());
+}
+
+TYPED_TEST(StridedSliceOpTest, NegEndMask) {
+  StridedSliceOpModel<TypeParam> m({2, 3}, {2}, {2}, {2}, 0, 0b10, 0, 0, 0);
+  m.SetInput({1, 2, 3, 4, 5, 6});
+  m.SetBegin({0, -1});
+  m.SetEnd({2, -3});
+  m.SetStrides({1, -1});
+  ASSERT_EQ(m.Invoke(), kTfLiteOk);
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3}));
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 2, 1, 6, 5, 4}));
+}
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc
index 0e97b89bca4d33..4084193ab48eb1 100644
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@@ -177,7 +177,8 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
                                      int num_threads,
                                      bool allow_fp32_relax_to_fp16,
                                      bool apply_delegate,
-                                     bool allocate_and_delegate) {
+                                     bool allocate_and_delegate,
+                                     bool use_simple_allocator) {
   input_shapes_ = input_shapes;
   allow_fp32_relax_to_fp16_ = allow_fp32_relax_to_fp16;
   apply_delegate_ = apply_delegate;
@@ -202,7 +203,7 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
   uint8_t* buffer_pointer = builder_.GetBufferPointer();
   UpdateOpVersion(buffer_pointer);
 
-  bool use_simple_allocator =
+  use_simple_allocator |=
       tflite::KernelTestDelegateProviders::Get()->ConstParams().Get<bool>(
           tflite::KernelTestDelegateProviders::kUseSimpleAllocator);
 
@@ -289,11 +290,12 @@ void SingleOpModel::Invoke() { ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk); }
 
 TfLiteStatus SingleOpModel::InvokeUnchecked() { return interpreter_->Invoke(); }
 
-void SingleOpModel::BuildInterpreter(
-    std::vector<std::vector<int>> input_shapes) {
+void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                                     bool use_simple_allocator) {
   BuildInterpreter(input_shapes, /*num_threads=*/-1,
                    /*allow_fp32_relax_to_fp16=*/false,
-                   /*apply_delegate=*/true, /*allocate_and_delegate=*/true);
+                   /*apply_delegate=*/true, /*allocate_and_delegate=*/true,
+                   use_simple_allocator);
 }
 
 // static
diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index cf61f32a9c97b5..4f3aa2fe27e329 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -511,9 +511,11 @@ class SingleOpModel {
   // `apply_delegate` is ignored.
   void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
                         int num_threads, bool allow_fp32_relax_to_fp16,
-                        bool apply_delegate, bool allocate_and_delegate = true);
+                        bool apply_delegate, bool allocate_and_delegate = true,
+                        bool use_simple_allocator = false);
 
-  void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
+  void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                        bool use_simple_allocator = false);
 
   // Executes inference, asserting success.
   void Invoke();
diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.cc b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
index 3269e6a177c439..5c93cef04d1ec9 100644
--- a/tensorflow/lite/python/interpreter_wrapper/numpy.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
@@ -164,6 +164,13 @@ bool FillStringBufferFromPyString(PyObject* value,
 
 bool FillStringBufferWithPyArray(PyObject* value,
                                  DynamicBuffer* dynamic_buffer) {
+  if (!PyArray_Check(value)) {
+    PyErr_Format(PyExc_ValueError,
+                 "Passed in value type is not a numpy array, got type %s.",
+                 value->ob_type->tp_name);
+    return false;
+  }
+
   PyArrayObject* array = reinterpret_cast<PyArrayObject*>(value);
   switch (PyArray_TYPE(array)) {
     case NPY_OBJECT:
diff --git a/tensorflow/lite/python/lite_v2_test.py b/tensorflow/lite/python/lite_v2_test.py
index c56359090a8a61..35eceb00ef60d9 100644
--- a/tensorflow/lite/python/lite_v2_test.py
+++ b/tensorflow/lite/python/lite_v2_test.py
@@ -134,6 +134,35 @@ def testScalarInput(self):
     actual_value = self._evaluateTFLiteModel(tflite_model, [input_data])
     self.assertEqual(expected_value.numpy(), actual_value)
 
+  @test_util.run_v2_only
+  def testStringInput(self):
+
+    class Model(tf.Module):
+
+      @tf.function
+      def __call__(self, x):
+        return x
+
+    root = Model()
+    concrete_func = root.__call__.get_concrete_function(
+        tf.constant([str(x) for x in range(11)]))
+    # Convert model.
+    converter = lite.TFLiteConverterV2.from_concrete_functions([concrete_func],
+                                                               root)
+    tflite_model = converter.convert()
+    input_data = tf.constant([str(x) for x in range(11)],
+                             shape=(11,),
+                             dtype=tf.dtypes.string)
+    # Check values from converted model.
+    interpreter = tf.lite.Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+    my_signature = interpreter.get_signature_runner()
+
+    with self.assertRaises(ValueError) as error:
+      _ = my_signature(x=input_data)
+    self.assertIn('Passed in value type is not a numpy array, got type ',
+                  str(error.exception))
+
   @test_util.run_v2_only
   def testModelWithoutInputs(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 3ca73fc36bbb7e..be1075dea1e190 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -211,6 +211,7 @@ tf_py_test(
     size = "small",
     srcs = ["group_by_reducer_test.py"],
     shard_count = 12,
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index a12f13097eb122..7f0a98980df703 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -70,6 +70,7 @@ tf_py_test(
     srcs = ["cache_test.py"],
     tags = [
         "notsan",  # TODO(b/206452257): re-enable after flakiness resolved.
+        "no_oss",
     ],
     deps = [
         ":checkpoint_test_base",
@@ -258,6 +259,7 @@ tf_py_test(
     srcs = ["flat_map_test.py"],
     grpc_enabled = True,
     shard_count = 8,
+    tags = ["no_oss"],
     deps = [
         ":checkpoint_test_base",
         ":test_base",
@@ -539,6 +541,7 @@ tf_py_test(
     size = "small",
     srcs = ["map_test.py"],
     shard_count = 19,
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         ":checkpoint_test_base",
         ":test_base",
@@ -890,6 +893,7 @@ tf_py_test(
         "no_tsan",  # TODO(b/191433147): reenable
         "no_windows",  # TODO(b/182379890)
         "notap",  # TODO(b/192359227)
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         ":checkpoint_test_base",
diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc
index 53e75e976c5c8d..6c15e089d2a9cd 100644
--- a/tensorflow/python/eager/pywrap_tfe_src.cc
+++ b/tensorflow/python/eager/pywrap_tfe_src.cc
@@ -256,6 +256,13 @@ PARSE_VALUE(ParseFloatValue, float, PyFloat_Check, PyFloat_AsDouble)
 #if PY_MAJOR_VERSION < 3
 bool ParseInt64Value(const string& key, PyObject* py_value, TF_Status* status,
                      int64_t* value) {
+  if (py_value == nullptr) {
+    TF_SetStatus(status, TF_INVALID_ARGUMENT,
+                 tensorflow::strings::StrCat(
+                     "Expecting int or long value for attr ", key, "."))
+        .c_str();
+    return false;
+  }
   if (PyInt_Check(py_value)) {
     *value = static_cast<int64_t>(PyInt_AsLong(py_value));
     return true;
@@ -397,11 +404,24 @@ bool SetOpAttrList(TFE_Context* ctx, TFE_Op* op, const char* key,
   const int num_values = PySequence_Size(py_list);
   if (attr_list_sizes != nullptr) (*attr_list_sizes)[key] = num_values;
 
-#define PARSE_LIST(c_type, parse_fn)                                      \
-  std::unique_ptr<c_type[]> values(new c_type[num_values]);               \
-  for (int i = 0; i < num_values; ++i) {                                  \
-    tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));   \
-    if (!parse_fn(key, py_value.get(), status, &values[i])) return false; \
+#define SEQUENCE_ITEM_NULL_CHECK(c_type, item)                           \
+  if (!item) {                                                           \
+    TF_SetStatus(status, TF_INVALID_ARGUMENT,                            \
+                 tensorflow::strings::StrCat(                            \
+                     "Expecting sequence of " #c_type " for attr ", key, \
+                     ", got ", py_list->ob_type->tp_name)                \
+                     .c_str());                                          \
+    return false;                                                        \
+  }
+
+#define PARSE_LIST(c_type, parse_fn)                                    \
+  std::unique_ptr<c_type[]> values(new c_type[num_values]);             \
+  for (int i = 0; i < num_values; ++i) {                                \
+    tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); \
+    SEQUENCE_ITEM_NULL_CHECK(c_type, py_value);                         \
+    if (!parse_fn(key, py_value.get(), status, &values[i])) {           \
+      return false;                                                     \
+    }                                                                   \
   }
 
   if (type == TF_ATTR_STRING) {
@@ -410,6 +430,7 @@ bool SetOpAttrList(TFE_Context* ctx, TFE_Op* op, const char* key,
     for (int i = 0; i < num_values; ++i) {
       tensorflow::StringPiece value;
       tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i));
+      SEQUENCE_ITEM_NULL_CHECK(string, py_value);
       if (!ParseStringValue(key, py_value.get(), status, &value)) return false;
       values[i] = value.data();
       lengths[i] = value.size();
diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 66f06af97c0cab..2f8c3bc07aa3af 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -490,6 +490,24 @@ def testEagerTensorFormatForVariant(self):
     self.assertEqual(
         f"{t!r}", "<tf.Tensor: shape=(), dtype=variant, value=<TensorList>>")
 
+  def testNumpyTooManyDimensions(self):
+    t = constant_op.constant(1., shape=[1] * 33)
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "Cannot convert tensor with 33 dimensions to NumPy array. NumPy arrays "
+        "can have at most 32 dimensions"):
+      t.numpy()
+
+  def testNumpyDimsTooBig(self):
+    # Creating a Numpy array fails in some cases if the product of non-zero
+    # dimensions is very big, even if the shape also has a zero in it.
+    t = array_ops.ones((0, 2**31, 2**31))
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r"Failed to create numpy array from tensor of shape "
+        r"\[0, 2147483648, 2147483648\]. Numpy error.*array is too big"):
+      t.numpy()
+
 
 class TFETensorUtilTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/keras/utils/BUILD b/tensorflow/python/keras/utils/BUILD
index fa442c8e8a2216..54b88089a2d3b2 100644
--- a/tensorflow/python/keras/utils/BUILD
+++ b/tensorflow/python/keras/utils/BUILD
@@ -412,6 +412,7 @@ tf_py_test(
     size = "small",
     srcs = ["conv_utils_test.py"],
     python_version = "PY3",
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python/keras",
diff --git a/tensorflow/python/kernel_tests/array_ops/BUILD b/tensorflow/python/kernel_tests/array_ops/BUILD
index 0751231a545100..fd5364636e6e59 100644
--- a/tensorflow/python/kernel_tests/array_ops/BUILD
+++ b/tensorflow/python/kernel_tests/array_ops/BUILD
@@ -19,6 +19,7 @@ cuda_py_test(
     tags = [
         "noasan",  # times out
         "optonly",  # times out
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -140,7 +141,10 @@ cuda_py_test(
     name = "concat_op_test",
     size = "medium",
     srcs = ["concat_op_test.py"],
-    tags = ["no_windows"],  # b/126916429
+    tags = [
+        "no_windows",  # b/126916429
+        "no_oss",  # TODO(b/258503209): Disable the test.
+    ],
     xla_tags = [
         "no_cuda_asan",  # times out
     ],
@@ -560,6 +564,7 @@ cuda_py_test(
     name = "slice_op_test",
     size = "medium",
     srcs = ["slice_op_test.py"],
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
@@ -609,6 +614,9 @@ cuda_py_test(
     name = "split_op_test",
     size = "medium",
     srcs = ["split_op_test.py"],
+    tags = [
+        "no_oss",  # TODO(b/258503209): Disable the test.
+    ],
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/kernel_tests/array_ops/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
index cabd4bddc22b9a..f54c92c5e21d61 100644
--- a/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops/array_ops_test.py
@@ -351,6 +351,15 @@ def testExpandDimsWithNonScalarDim(self):
                                 "must be a tensor with a single value"):
       array_ops.expand_dims(1, axis=[0, 1])
 
+  def testReshapeWithManyDims(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                "too many dimensions"):
+      self.evaluate(
+          array_ops.reshape(
+              tensor=[[1]],
+              shape=constant_op.constant([1 for i in range(254)],
+                                         dtype=dtypes.int64)))
+
 
 @test_util.with_eager_op_as_function
 class ReverseV2Test(test_util.TensorFlowTestCase):
@@ -1544,6 +1553,21 @@ def testEager(self):
                           [[0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 2, 3, 0, 0],
                            [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]])
 
+  # b/246325518: Bad shape size. Explicitly testing different execution paths.
+  def testInvalidMirrorPadGradEagerMode(self):
+    with context.eager_mode():
+      with self.assertRaises(Exception):
+        gen_array_ops.MirrorPadGrad(
+            input=[1], paddings=[[0x77f00000, 0xa000000]], mode="REFLECT")
+
+  # b/246325518: Bad shape size. Explicitly testing different execution paths.
+  def testInvalidMirrorPadGradGraphMode(self):
+    with context.graph_mode():
+      with self.assertRaises(Exception):
+        result = gen_array_ops.MirrorPadGrad(
+            input=[1], paddings=[[0x77f00000, 0xa000000]], mode="REFLECT")
+        self.evaluate(result)
+
   def testSymmetricMirrorPadGrad(self):
     t = np.broadcast_to(np.arange(0, 7), (3, 2, 1, 7))
     paddings = constant_op.constant([
@@ -1767,6 +1791,72 @@ def testOutOfBoundAxis(self):
               max_range=input_max,
               axis=2**31 - 1))
 
+  @test_util.run_v2_only
+  def testInvalidAxis(self):
+
+    @def_function.function
+    def test_quantize_and_dequantize_v2():
+      gen_array_ops.quantize_and_dequantize_v2(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          signed_input=True,
+          num_bits=1,
+          range_given=True,
+          round_mode="HALF_TO_EVEN",
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v3():
+      gen_array_ops.quantize_and_dequantize_v3(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          num_bits=1,
+          signed_input=True,
+          range_given=True,
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v4():
+      gen_array_ops.quantize_and_dequantize_v4(
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          signed_input=True,
+          num_bits=1,
+          range_given=True,
+          round_mode="HALF_TO_EVEN",
+          narrow_range=True,
+          axis=0x7fffffff)
+
+    @def_function.function
+    def test_quantize_and_dequantize_v4_grad():
+      gen_array_ops.quantize_and_dequantize_v4_grad(
+          gradients=[2.5],
+          input=[2.5],
+          input_min=[1.0],
+          input_max=[10.0],
+          axis=0x7fffffff)
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v2()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v3()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v4()
+
+    with self.assertRaisesRegex(
+        ValueError, "Axis cannot be >= kint32max value, got 2147483647"):
+      test_quantize_and_dequantize_v4_grad()
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class SortedSearchTest(test_util.TensorFlowTestCase):
@@ -1987,6 +2077,17 @@ def testZeroValueSize(self):
                 side=side,
                 out_type=dtype), array_ops.zeros([2, 0], dtype))
 
+  def testZeroInputSize(self):
+    dtype = dtypes.int32
+    for side in ("left", "right"):
+      with self.subTest(side=side):
+        self.assertAllEqual(
+            array_ops.searchsorted(
+                array_ops.ones([2, 0]),
+                array_ops.ones([2, 3]),
+                side=side,
+                out_type=dtype), array_ops.zeros([2, 3], dtype))
+
   def testInt64(self):
 
     @def_function.function
diff --git a/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py b/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
index b74024aa60c8b6..c3720efa6bd305 100644
--- a/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
+++ b/tensorflow/python/kernel_tests/array_ops/edit_distance_op_test.py
@@ -209,8 +209,8 @@ def testEditDistanceZeroLengthHypothesisAndTruth(self):
 
   def testEditDistanceBadIndices(self):
     hypothesis_indices = np.full((3, 3), -1250999896764, dtype=np.int64)
-    hypothesis_values = np.empty(3, dtype=np.int64)
-    hypothesis_shape = np.empty(3, dtype=np.int64)
+    hypothesis_values = np.zeros(3, dtype=np.int64)
+    hypothesis_shape = np.zeros(3, dtype=np.int64)
     truth_indices = np.full((3, 3), -1250999896764, dtype=np.int64)
     truth_values = np.full([3], 2, dtype=np.int64)
     truth_shape = np.full([3], 2, dtype=np.int64)
diff --git a/tensorflow/python/kernel_tests/composite_tensor_ops_test.py b/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
index e5e9d1ef9bf6d9..4bce5c624d2ea9 100644
--- a/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
+++ b/tensorflow/python/kernel_tests/composite_tensor_ops_test.py
@@ -18,11 +18,14 @@
 
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import composite_tensor_ops
+from tensorflow.python.ops import gen_composite_tensor_ops
+from tensorflow.python.ops import gen_list_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
@@ -83,6 +86,30 @@ def testEncodingErrors(self, value, spec, message):
     with self.assertRaisesRegex(ValueError, message):
       composite_tensor_ops.composite_tensor_to_variants(value(), spec)
 
+  def testDecodingEmptyNonScalarTensorError(self):
+    if not context.executing_eagerly():
+      # Creating a variant tensor of an empty list is not allowed in eager mode.
+      return
+
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'must not be an empty variant tensor'):
+      gen_composite_tensor_ops.CompositeTensorVariantToComponents(
+          encoded=constant_op.constant([], dtype=dtypes.variant),
+          metadata='',
+          Tcomponents=[dtypes.int32])
+
+  def testDecodingInvalidEncodedInputError(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'not a valid CompositeTensorVariant tensor'):
+      self.evaluate(
+          gen_composite_tensor_ops.CompositeTensorVariantToComponents(
+              encoded=gen_list_ops.EmptyTensorList(
+                  element_dtype=dtypes.int32,
+                  element_shape=[1, 2],
+                  max_num_elements=2),
+              metadata='',
+              Tcomponents=[dtypes.int32]))
+
   def testRoundTripThroughTensorProto(self):
     value = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]])
     encoded = composite_tensor_ops.composite_tensor_to_variants(value)
diff --git a/tensorflow/python/kernel_tests/control_flow/BUILD b/tensorflow/python/kernel_tests/control_flow/BUILD
index e419fccb7fc345..946a3ef56df96b 100644
--- a/tensorflow/python/kernel_tests/control_flow/BUILD
+++ b/tensorflow/python/kernel_tests/control_flow/BUILD
@@ -181,6 +181,7 @@ cuda_py_test(
     name = "scan_ops_test",
     size = "medium",
     srcs = ["scan_ops_test.py"],
+    tags = ["no_oss"], # TODO(b/258503209): Disable the test. 
     deps = [
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:errors",
diff --git a/tensorflow/python/kernel_tests/data_structures/list_ops_test.py b/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
index bc3e2f3c79083b..c95c27eadc0fbd 100644
--- a/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
+++ b/tensorflow/python/kernel_tests/data_structures/list_ops_test.py
@@ -94,6 +94,16 @@ def testPopFromEmptyTensorListFails(self, max_num_elements):
       l = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
       self.evaluate(l)
 
+  def testTensorListReserveWithNonScalarNumElements(self):
+    # list_kernels.cc in tf/core/kernels raises InvalidArgumentError, and
+    # tf_ops_n_z.cc in tf/compiler/mlir/tf/ir raises UnknownError.
+    with self.assertRaises((errors.InvalidArgumentError, errors.UnknownError)):
+      l = list_ops.tensor_list_reserve(
+          element_dtype=dtypes.float32,
+          element_shape=[2, 3],
+          num_elements=constant_op.constant([1, 1]))
+      self.evaluate(l)
+
   def testPopUninitializedTensorUseListElementShape(self):
     l = list_ops.tensor_list_reserve(
         element_dtype=dtypes.float32, element_shape=[2, 3], num_elements=3)
@@ -481,6 +491,30 @@ def testScatterOutputListSizeWithNumElementsSpecified(self):
     # TensorListScatter should return a list with size num_elements.
     self.assertAllEqual(list_ops.tensor_list_length(l), 5)
 
+  def testScatterFailsWhenElementShapeIsNotVector(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      l = gen_list_ops.tensor_list_scatter(
+          # Wrong element_shape. Should be at most rank 1.
+          c0, [1, 3], element_shape=[[1]])
+      self.evaluate(l)
+
+  def testScatterV2FailsWhenElementShapeIsNotVector(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      l = gen_list_ops.tensor_list_scatter_v2(
+          # Wrong element_shape. Should be at most rank 1.
+          c0, [1, 3], element_shape=[[1]], num_elements=2)
+      self.evaluate(l)
+
   def testScatterFailsWhenIndexLargerThanNumElements(self):
     c0 = constant_op.constant([1.0, 2.0])
     with self.assertRaisesRegex(
@@ -515,6 +549,17 @@ def testScatterWithNegativeIndicesFails(self):
       l = list_ops.tensor_list_scatter(c0, [-1, -2], element_shape=[])
       self.evaluate(l)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testScatterWithNonScalarFails(self):
+    c = constant_op.constant(value=[2])
+    num_elements = np.array([[], [], []], dtype=np.float32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be rank 0 but is rank \d+|"
+                                r"\w+ must be a scalar"):
+      self.evaluate(
+          gen_list_ops.TensorListScatterV2(
+              tensor=c, indices=c, element_shape=c, num_elements=num_elements))
+
   def testScatterIntoExistingList(self):
     l = list_ops.tensor_list_reserve(
         element_dtype=dtypes.float32, element_shape=[], num_elements=3)
@@ -560,6 +605,17 @@ def testTensorListFromTensor(self):
     self.assertAllEqual(e, 1.0)
     self.assertAllEqual(list_ops.tensor_list_length(l), 0)
 
+  def testTensorListFromTensorFailsWhenElementShapeIsNotVector(self):
+    t = constant_op.constant([1.0, 2.0])
+    # In Eager mode, InvalidArgumentError is generated by the Compute function.
+    # In graph mode, ValueError is generated by the shape function.
+    with self.assertRaisesRegex(
+        (errors.InvalidArgumentError, ValueError),
+        "must be at most rank 1"):
+      # Wrong element_shape. Should be at most rank 1.
+      l = list_ops.tensor_list_from_tensor(t, element_shape=[[1]])
+      self.evaluate(l)
+
   @test_util.run_gpu_only
   def testFromTensorGPU(self):
     with context.device("gpu:0"):
@@ -1458,6 +1514,24 @@ def testConcatWithUninitializedTensorsFailsIfNoInputLengths(self):
       t = list_ops.tensor_list_concat(l, element_dtype=dtypes.float32)
       self.evaluate(t)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testConcatWithInvalidElementShape(self):
+    l = list_ops.tensor_list_reserve(
+        element_dtype=dtypes.float32, element_shape=[], num_elements=0)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"element_shape must not be empty"):
+      self.evaluate(gen_list_ops.tensor_list_concat(
+          input_handle=l, element_dtype=dtypes.float32, element_shape=[]))
+
+  def testEmptyTensorListInvalidShape(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be at most rank 1 but is rank 2"):
+      t = gen_list_ops.EmptyTensorList(
+          element_shape=array_ops.ones(dtype=dtypes.int32, shape=[1, 0]),
+          max_num_elements=constant_op.constant(1),
+          element_dtype=dtypes.int32)
+      self.evaluate(t)
+
   def testEvenSplit(self):
 
     def RunTest(input_tensor, lengths, expected_stacked_output):
@@ -1604,6 +1678,15 @@ def testResizeWithInvalidSizeFails(self):
       l = list_ops.tensor_list_resize(l, -1)
       self.evaluate(l)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testResizeWithNonScalarFails(self):
+    l = list_ops.tensor_list_from_tensor([3, 4, 5], element_shape=[])
+    size = np.zeros([0, 2, 3, 3])
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r"Shape must be rank 0 but is rank \d+|"
+                                r"\w+ must be a scalar"):
+      self.evaluate(gen_list_ops.TensorListResize(input_handle=l, size=size))
+
   @test_util.run_deprecated_v1
   @test_util.enable_control_flow_v2
   def testSkipEagerResizeGrad(self):
diff --git a/tensorflow/python/kernel_tests/image_ops/BUILD b/tensorflow/python/kernel_tests/image_ops/BUILD
index 96de1e11c8efe4..4d46cf8b5a1dc7 100644
--- a/tensorflow/python/kernel_tests/image_ops/BUILD
+++ b/tensorflow/python/kernel_tests/image_ops/BUILD
@@ -102,7 +102,7 @@ tf_py_test(
     ],
 )
 
-tf_py_test(
+cuda_py_test(
     name = "draw_bounding_box_op_test",
     size = "small",
     srcs = ["draw_bounding_box_op_test.py"],
diff --git a/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py b/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
index 5b695e861136b8..a66d8d8a9a2a13 100644
--- a/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
+++ b/tensorflow/python/kernel_tests/image_ops/draw_bounding_box_op_test.py
@@ -16,8 +16,11 @@
 
 import numpy as np
 
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import image_ops
 from tensorflow.python.ops import image_ops_impl
@@ -50,11 +53,16 @@ def _fillBorder(self, image, color):
     image[height - 1, 0:width, 0:depth] = color
     return image
 
-  def _testDrawBoundingBoxColorCycling(self, img, colors=None):
+  def _testDrawBoundingBoxColorCycling(self,
+                                       img,
+                                       dtype=dtypes.float32,
+                                       colors=None):
     """Tests if cycling works appropriately.
 
     Args:
       img: 3-D numpy image on which to draw.
+      dtype: image dtype (float, half).
+      colors: color table.
     """
     color_table = colors
     if colors is None:
@@ -82,7 +90,7 @@ def _testDrawBoundingBoxColorCycling(self, img, colors=None):
       bboxes = math_ops.cast(bboxes, dtypes.float32)
       bboxes = array_ops.expand_dims(bboxes, 0)
       image = ops.convert_to_tensor(image)
-      image = image_ops_impl.convert_image_dtype(image, dtypes.float32)
+      image = image_ops_impl.convert_image_dtype(image, dtype)
       image = array_ops.expand_dims(image, 0)
       image = image_ops.draw_bounding_boxes(image, bboxes, colors=colors)
       with self.cached_session(use_gpu=False) as sess:
@@ -118,6 +126,30 @@ def testDrawBoundingBoxRGBAColorCyclingWithColors(self):
                          [0, 0, 0.5, 1]])
     self._testDrawBoundingBoxColorCycling(image, colors=colors)
 
+  def testDrawBoundingBoxHalf(self):
+    """Test if RGBA color cycling works correctly with provided colors."""
+    image = np.zeros([10, 10, 4], "float32")
+    colors = np.asarray([[0.5, 0, 0.5, 1], [0.5, 0.5, 0, 1], [0.5, 0, 0, 1],
+                         [0, 0, 0.5, 1]])
+    self._testDrawBoundingBoxColorCycling(
+        image, dtype=dtypes.half, colors=colors)
+
+  # generate_bound_box_proposals is only available on GPU.
+  @test_util.run_gpu_only()
+  def testGenerateBoundingBoxProposals(self):
+    # Op only exists on GPU.
+    with self.cached_session(use_gpu=True):
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "must be rank 4"):
+        scores = constant_op.constant(
+            value=[[[[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]]])
+        self.evaluate(
+            image_ops.generate_bounding_box_proposals(
+                scores=scores,
+                bbox_deltas=[],
+                image_info=[],
+                anchors=[],
+                pre_nms_topn=1))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py b/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
index 9d9b7bf7248d41..3247fbb428adba 100644
--- a/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
+++ b/tensorflow/python/kernel_tests/image_ops/extract_image_patches_op_test.py
@@ -17,7 +17,9 @@
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
@@ -139,6 +141,17 @@ def testComplexDataTypes(self):
             padding=padding,
             patches=patches)
 
+  def testInvalidAttributes(self):
+    """Test for passing weird things into ksizes."""
+    with self.assertRaisesRegex(TypeError, "Expected list"):
+      image = constant_op.constant([0.0])
+      ksizes = math_ops.cast(
+          constant_op.constant(dtype=dtypes.int16, value=[[1, 4], [5, 2]]),
+          dtype=dtypes.qint16)
+      strides = [1, 1, 1, 1]
+      self.evaluate(
+          array_ops.extract_image_patches(
+              image, ksizes=ksizes, strides=strides, padding="SAME"))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/linalg/BUILD b/tensorflow/python/kernel_tests/linalg/BUILD
index 8d423a9f95f204..02252640551f66 100644
--- a/tensorflow/python/kernel_tests/linalg/BUILD
+++ b/tensorflow/python/kernel_tests/linalg/BUILD
@@ -623,6 +623,7 @@ cuda_py_test(
     srcs = ["matrix_solve_ls_op_test.py"],
     tags = [
         "noasan",  # TODO(b/337374867) fails with -fsanitize=null
+        "no_oss",
     ],
     deps = [
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/python/kernel_tests/linalg/eig_op_test.py b/tensorflow/python/kernel_tests/linalg/eig_op_test.py
index 37ba5b0cc89592..33ce04f50d3baa 100644
--- a/tensorflow/python/kernel_tests/linalg/eig_op_test.py
+++ b/tensorflow/python/kernel_tests/linalg/eig_op_test.py
@@ -18,8 +18,10 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_linalg_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
@@ -88,6 +90,16 @@ def testMatrixThatFailsWhenFlushingDenormsToZero(self):
       self.assertAllClose(matrix,
                           np.matmul(np.matmul(v, np.diag(e)), v.transpose()))
 
+  def testMismatchedDtypes(self):
+    tensor = constant_op.constant([[0, 1], [2, 3]], dtype=dtypes_lib.float32)
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Invalid output dtype"):
+      self.evaluate(
+          gen_linalg_ops.eig(
+              input=tensor,
+              Tout=dtypes_lib.complex128,  # Expected dtype: complex64.
+              compute_v=True))
+
 
 def SortEigenValues(e):
   perm = np.argsort(e.real + e.imag, -1)
diff --git a/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py b/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
index 035791ce0a5bf5..d129bea768e85f 100644
--- a/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
+++ b/tensorflow/python/kernel_tests/linalg/sparse/csr_sparse_matrix_ops_test.py
@@ -1313,6 +1313,16 @@ def testOrderingAMD(self):
       self.assertLess(cholesky_with_amd_nnz_value,
                       cholesky_without_ordering_nnz_value)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNoMatrixNoCrash(self):
+    # Round-about way of creating an empty variant tensor that works in both
+    # graph and eager modes.
+    no_matrix = array_ops.reshape(dense_to_csr_sparse_matrix([[0.0]]), [1])[0:0]
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "(Invalid input matrix)|(Shape must be rank 0)"):
+      sparse_csr_matrix_ops.sparse_matrix_nnz(no_matrix)
+
 
 class CSRSparseMatrixOpsBenchmark(test.Benchmark):
 
diff --git a/tensorflow/python/kernel_tests/linalg/svd_op_test.py b/tensorflow/python/kernel_tests/linalg/svd_op_test.py
index 7a4182b8105688..8ceca06beff0a6 100644
--- a/tensorflow/python/kernel_tests/linalg/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/linalg/svd_op_test.py
@@ -108,6 +108,14 @@ def testExecuteMultipleWithoutError(self):
     for i in range(0, len(val), 2):
       self.assertAllEqual(val[i], val[i + 1])
 
+  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
+  def testEmptyBatches(self):
+    matrices = constant_op.constant(1.0, shape=[0, 2, 2])
+    s, u, v = self.evaluate(linalg_ops.svd(matrices))
+    self.assertAllEqual(s, np.zeros([0, 2]))
+    self.assertAllEqual(u, np.zeros([0, 2, 2]))
+    self.assertAllEqual(v, np.zeros([0, 2, 2]))
+
 
 def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
                   full_matrices_):
diff --git a/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py b/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
index 9161b3b082270c..ad0c0469717f5f 100644
--- a/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/math_ops/bincount_op_test.py
@@ -24,6 +24,7 @@
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import bincount_ops
 from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
@@ -150,6 +151,31 @@ def test_shape_function(self):
       v2 = gen_math_ops.bincount([1, 2, 3, 1, 6, 8], s, [])
       self.assertAllEqual(v2.get_shape().as_list(), [None])
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    binary_output = True
+    inp = random_ops.random_uniform(
+        shape=[10, 10],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.int32,
+        seed=-2460)
+    size = random_ops.random_uniform(
+        shape=[], minval=-10000, maxval=10000, dtype=dtypes.int32, seed=-10000)
+    weights = random_ops.random_uniform(
+        shape=[],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-10000)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_math_ops.dense_bincount(
+              input=inp,
+              size=size,
+              weights=weights,
+              binary_output=binary_output))
+
 
 class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
@@ -366,7 +392,7 @@ def test_sparse_bincount_all_count(self, dtype):
     num_rows = 128
     size = 1000
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
 
     np_out = np.bincount(inp_vals, minlength=size)
@@ -390,7 +416,7 @@ def test_sparse_bincount_all_count_with_weights(self, dtype):
     num_rows = 128
     size = 1000
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
     inp_weight = np.random.random((n_elems,))
 
@@ -415,7 +441,7 @@ def test_sparse_bincount_all_binary(self, dtype):
     num_rows = 128
     size = 10
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
 
     np_out = np.ones((size,))
@@ -440,7 +466,7 @@ def test_sparse_bincount_all_binary_weights(self, dtype):
     num_rows = 128
     size = 10
     n_elems = 4096
-    inp_indices = np.random.randint(0, num_rows, (n_elems,))
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
     inp_vals = np.random.randint(0, size, (n_elems,), dtype=dtype)
     inp_weight = np.random.random((n_elems,))
 
@@ -532,6 +558,27 @@ def test_size_is_not_scalar(self):  # b/206619828
               weights=[0, 0],
               binary_output=False))
 
+  def test_sparse_bincount_input_validation(self):
+    np.random.seed(42)
+    num_rows = 128
+    size = 1000
+    n_elems = 4096
+    inp_indices = np.random.randint(0, num_rows, (n_elems, 1))
+    inp_vals = np.random.randint(0, size, (n_elems,))
+
+    # Insert negative index.
+    inp_indices[10, 0] = -2
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of bounds"):
+      self.evaluate(
+          gen_math_ops.sparse_bincount(
+              indices=inp_indices,
+              values=inp_vals,
+              dense_shape=[num_rows],
+              size=size,
+              weights=[]))
+
 
 class RaggedBincountOpTest(test_util.TensorFlowTestCase,
                            parameterized.TestCase):
@@ -684,6 +731,18 @@ def test_size_is_not_scalar(self):  # b/206619828
               binary_output=False,
               name=None))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_splits_empty(self):  # b/238450914
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Splits must be non-empty"):
+      self.evaluate(
+          gen_math_ops.ragged_bincount(
+              splits=[],  # Invalid splits
+              values=[1],
+              size=1,
+              weights=[1],
+              binary_output=False,
+              name=None))
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/math_ops/sets_test.py b/tensorflow/python/kernel_tests/math_ops/sets_test.py
index 61a11606e661ed..a35214173d59b8 100644
--- a/tensorflow/python/kernel_tests/math_ops/sets_test.py
+++ b/tensorflow/python/kernel_tests/math_ops/sets_test.py
@@ -23,6 +23,7 @@
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_set_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sets
 from tensorflow.python.ops import sparse_ops
@@ -1303,6 +1304,18 @@ def test_set_union_output_is_sorted(self, dtype):
         result.values,
         _constant([1, 3, 5, 7, 9, 0, 2, 4, 5, 6, 6, 8, 9], dtype))
 
+  def test_raw_ops_setsize_invalid_shape(self):
+    with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                "Shape must be a 1D tensor"):
+      invalid_shape = 1
+      self.evaluate(
+          gen_set_ops.set_size(
+              set_indices=1,
+              set_values=[1, 1],
+              set_shape=invalid_shape,
+              validate_indices=True,
+              name=""))
+
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/BUILD b/tensorflow/python/kernel_tests/nn_ops/BUILD
index 242334d9b05f70..1b803e4f3af907 100644
--- a/tensorflow/python/kernel_tests/nn_ops/BUILD
+++ b/tensorflow/python/kernel_tests/nn_ops/BUILD
@@ -149,6 +149,7 @@ cuda_py_test(
     shard_count = 2,
     tags = [
         "optonly",  # flaky timeouts unless optimized
+        "no_oss",  # TODO(b/258503209): Disable the test. 
     ],
     deps = [
         "//tensorflow/python:array_ops",
@@ -483,6 +484,7 @@ cuda_py_test(
     srcs = ["pooling_ops_3d_test.py"],
     deps = [
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_for_generated_wrappers",
         "//tensorflow/python:nn_grad",
         "//tensorflow/python:nn_ops",
@@ -637,7 +639,10 @@ cuda_py_test(
     name = "xent_op_d9m_test",
     size = "medium",
     srcs = ["xent_op_d9m_test.py"],
-    tags = ["notsan"],  # TODO(b/200548634): Remove.
+    tags = [
+        "notsan",  # TODO(b/200548634): Remove.
+        "no_oss",  # TODO(b/258503209): Disable the test.
+    ],
     xla_enable_strict_auto_jit = False,
     deps = [
         ":xent_op_test_base",
diff --git a/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py b/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
index 91a35f9002b1f1..265d1462793492 100644
--- a/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/conv_ops_test.py
@@ -32,6 +32,7 @@
 from tensorflow.python.layers import convolutional
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
@@ -759,6 +760,15 @@ def testConv2DExplicitPaddingWithDilations(self):
         padding=[[2, 1], [1, 2]],
         dilations=[2, 3])
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testConv2dOnlyPaddingReturnsZeros(self):
+    self._VerifyValues(
+        tensor_in_sizes=[1, 0, 2, 1],
+        filter_in_sizes=[1, 1, 1, 1],
+        strides=[1, 1],
+        padding=[[1, 1], [1, 1]],
+        expected=[0, 0, 0, 0, 0, 0, 0, 0])
+
   def testConv2DExplicitPaddingWithLayoutOptimizer(self):
     # Test with Grappler's layout optimizer, to ensure the layout optimizer
     # handles explicit padding correctly.
@@ -1103,6 +1113,23 @@ def testConv2DInputSizesContainsOnlySpatialDimensionsBackpropInput(self):
           use_gpu=use_gpu,
           err=1e-5)
 
+  @test_util.run_in_graph_and_eager_modes
+  @test_util.disable_xla("b/239598470")
+  def testConv2DBackpropInputDegenerateBackpropInput(self):
+    input_sizes = [3, 1, 1, 2]
+    expected_output = np.zeros(input_sizes).flatten()
+    for (data_format, use_gpu) in GetTestConfigs():
+      self._RunAndVerifyBackpropInput(
+          input_sizes=input_sizes,
+          filter_sizes=[1, 3, 2, 3],
+          output_sizes=[3, 1, 0, 3],
+          strides=[1, 2],
+          padding="VALID",
+          expected=expected_output,
+          data_format=data_format,
+          use_gpu=use_gpu,
+          err=1e-5)
+
   # Testing for backprops
   def _RunAndVerifyBackpropFilter(self,
                                   input_sizes,
@@ -1293,7 +1320,7 @@ def _RunAndVerifyBackpropInputDilation(self, input_sizes, filter_sizes,
     x2 = self._CreateNumpyTensor(filter_sizes)
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.cached_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu):
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -1339,7 +1366,7 @@ def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes,
     x2 = self._CreateNumpyTensor(filter_sizes)
     default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     if default_dilations or use_gpu:
-      with self.cached_session(use_gpu=use_gpu) as sess:
+      with self.cached_session(use_gpu=use_gpu):
         if data_format == "NCHW":
           input_sizes = test_util.NHWCToNCHW(input_sizes)
         t1 = constant_op.constant(x1, shape=input_sizes)
@@ -2602,6 +2629,27 @@ def testOpEdgeCases(self):
               strides=[1, 1, 1, 1],
               padding=[[0, 0], [-1, 0], [0, 0], [0, 0]]))
 
+  def testConv2DBackpropInputInvalidOutBackpropRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        input_sizes = constant_op.constant([65534, 65534],
+                                           shape=[2],
+                                           dtype=dtypes.int32)
+        filters = constant_op.constant(
+            0.159749106, shape=[3, 3, 2, 2], dtype=dtypes.float32)
+        out_backprop = constant_op.constant(0, shape=[], dtype=dtypes.float32)
+        t = gen_nn_ops.conv2d_backprop_input(
+            input_sizes=input_sizes,
+            filter=filters,
+            out_backprop=out_backprop,
+            strides=[1, 1, 1, 1],
+            padding="SAME",
+            use_cudnn_on_gpu=True,
+            explicit_paddings=[],
+            data_format="NHWC",
+            dilations=[1, 1, 1, 1])
+        self.evaluate(t)
+
 
 @test_util.run_all_without_tensor_float_32("Avoid TF32 conv on GPU")
 class DepthwiseConv2DTest(test.TestCase):
@@ -2629,7 +2677,7 @@ def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, stride, padding,
     # numbers from 1.
     x1 = [f * 1.0 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 for f in range(1, total_size_2 + 1)]
-    with self.cached_session() as sess:
+    with self.cached_session():
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t1.set_shape(tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
@@ -2900,7 +2948,7 @@ def _CompareFwdConv2D(self, tensor_in_sizes, filter_in_sizes, conv_strides,
     x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
     x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
 
-    with self.cached_session(use_gpu=False) as sess:
+    with self.cached_session(use_gpu=False):
       t1 = constant_op.constant(x1, shape=tensor_in_sizes)
       t2 = constant_op.constant(x2, shape=filter_in_sizes)
       strides = [1] + conv_strides + [1]
@@ -3383,6 +3431,33 @@ def testAddWithSameSrcAndAddTensorBuffer(self):
         np.rint(expected_output),
         self.evaluate(add).reshape(-1))
 
+  # Fused resize and pad conv.
+  @test_util.run_in_graph_and_eager_modes()
+  def testResizeAndPadLargeResize(self):
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "Encountered overflow"):
+      mode = "REFLECT"
+      strides = [1, 1, 1, 1]
+      padding = "SAME"
+      resize_align_corners = False
+      tensor = constant_op.constant(
+          147, shape=[3, 3, 1, 4], dtype=dtypes.float32)
+      size = constant_op.constant([1879048192, 1879048192], dtype=dtypes.int32)
+      paddings = constant_op.constant([[0, 0], [0, 0], [0, 0], [0, 0]],
+                                      dtype=dtypes.int32)
+      kernel = constant_op.constant(
+          123, shape=[1, 3, 4, 1], dtype=dtypes.float32)
+      self.evaluate(
+          gen_nn_ops.fused_resize_and_pad_conv2d(
+              input=tensor,
+              size=size,
+              paddings=paddings,
+              filter=kernel,
+              mode=mode,
+              strides=strides,
+              padding=padding,
+              resize_align_corners=resize_align_corners))
+
 
 if __name__ == "__main__":
   for index, (input_size_, filter_size_, output_size_, stride_,
diff --git a/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
index 7b153ae1ed7084..59b20de84b4c4e 100644
--- a/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/fractional_avg_pool_op_test.py
@@ -333,6 +333,41 @@ def testNegativeSeqValuesForGradOp(self):
 
         self.evaluate(z)
 
+  def testPoolingRatioHasMoreDimThanInput(self):
+    with self.cached_session() as _:
+      with self.assertRaisesRegex(
+          errors.InvalidArgumentError,
+          r"Pooling ratio is higher than input dimension size for dimension 1.*"
+      ):
+        result = nn_ops.gen_nn_ops.fractional_avg_pool(
+            value=constant_op.constant(
+                value=[[[[1, 4, 2, 3]]]], dtype=dtypes.int64),
+            pooling_ratio=[1.0, 1.44, 1.73, 1.0],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+            name=None)
+        self.evaluate(result)
+
+  def testPoolingRatioValueOutOfRange(self):
+    with self.cached_session() as _:
+      # Whether turn on `TF2_BEHAVIOR` generates different error messages
+      with self.assertRaisesRegex(
+          (errors.InvalidArgumentError, ValueError),
+          r"(pooling_ratio cannot be smaller than 1, got: .*)|(is negative)"):
+        result = nn_ops.gen_nn_ops.fractional_avg_pool(
+            value=np.zeros([3, 30, 30, 3]),
+            pooling_ratio=[1, -1, 3, 1],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+        )
+        self.evaluate(result)
+
 
 class FractionalAvgPoolGradTest(test.TestCase):
   """Tests for FractionalAvgPoolGrad.
@@ -541,6 +576,27 @@ def testLargePoolingRatioThroughGradientError(self):
           delta=1e-2)
       self.assertLess(gradient_error, error_margin)
 
+  def testInvalidSeqRaiseErrorForFractionalAvgPoolGrad(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      with self.cached_session() as _:
+        overlapping = True
+        orig_input_tensor_shape = constant_op.constant(
+            -1879048192, shape=[4], dtype=dtypes.int64)
+        out_backprop = constant_op.constant([],
+                                            shape=[0, 0, 0, 0],
+                                            dtype=dtypes.float64)
+        row_pooling_sequence = constant_op.constant(
+            1, shape=[4], dtype=dtypes.int64)
+        col_pooling_sequence = constant_op.constant(
+            1, shape=[4], dtype=dtypes.int64)
+        t = gen_nn_ops.fractional_avg_pool_grad(
+            orig_input_tensor_shape=orig_input_tensor_shape,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
index 5acacdbb7463b2..9102973fa13b50 100644
--- a/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/fractional_max_pool_op_test.py
@@ -124,7 +124,7 @@ def _ValidateFractionalMaxPoolResult(self, input_tensor, pooling_ratio,
     Returns:
       None
     """
-    with self.cached_session() as sess:
+    with self.cached_session():
       p, r, c = nn_ops.fractional_max_pool_v2(
           input_tensor,
           pooling_ratio,
@@ -155,7 +155,7 @@ def _testVisually(self):
           overlapping))
       rand_mat = self._PRNG.randint(10, size=tensor_shape)
       pooling_ratio = [1, math.sqrt(2), math.sqrt(2), 1]
-      with self.cached_session() as sess:
+      with self.cached_session():
         p, r, c = nn_ops.fractional_max_pool_v2(
             rand_mat,
             pooling_ratio,
@@ -320,7 +320,7 @@ def testDeterminismExceptionThrowing(self):
       nn_ops.fractional_max_pool(
           rand_mat, [1, 1.5, 1.5, 1], seed=1, seed2=1, deterministic=True)
 
-  def testPoolingRatio(self):
+  def testPoolingRatioHasMoreDimThanInput(self):
     with self.cached_session() as _:
       with self.assertRaisesRegex(
           errors.InvalidArgumentError,
@@ -338,6 +338,23 @@ def testPoolingRatio(self):
             name=None)
         self.evaluate(result)
 
+  def testPoolingRatioValueOutOfRange(self):
+    with self.cached_session() as _:
+      # Whether turn on `TF2_BEHAVIOR` generates different error messages
+      with self.assertRaisesRegex(
+          (errors.InvalidArgumentError, ValueError),
+          r"(pooling_ratio cannot be smaller than 1, got: .*)|(is negative)"):
+        result = nn_ops.gen_nn_ops.fractional_max_pool(
+            value=np.zeros([3, 30, 30, 3]),
+            pooling_ratio=[1, -1, 3, 1],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+        )
+        self.evaluate(result)
+
 
 class FractionalMaxPoolGradTest(test.TestCase):
   """Tests for FractionalMaxPoolGrad.
@@ -630,6 +647,47 @@ def testWhenRepeatedMaxValueInPoolingRegion(self):
       self.assertAllClose(expected_input_backprop_overlapping,
                           input_backprop_overlapping)
 
+  def testInvalidSeqRaiseErrorForFractionalMaxPoolGrad(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      with self.cached_session():
+        overlapping = True
+        orig_input = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        orig_output = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        out_backprop = constant_op.constant(
+            .453409232, shape=[1, 7, 13, 1], dtype=dtypes.float32)
+        row_pooling_sequence = constant_op.constant(
+            0, shape=[5], dtype=dtypes.int64)
+        col_pooling_sequence = constant_op.constant(
+            0, shape=[5], dtype=dtypes.int64)
+        t = gen_nn_ops.FractionalMaxPoolGrad(
+            orig_input=orig_input,
+            orig_output=orig_output,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
+  def testOverLargeSeqRaiseErrorForFractionalMaxPoolGrad(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      with self.cached_session():
+        overlapping = False
+        orig_input = [[[[1, 1, 1, 1, 1]]]]
+        orig_output = [[[[1, 1, 1]]]]
+        out_backprop = [[[[3], [3], [6]]]]
+        row_pooling_sequence = [-0x4000000, 1, 1]
+        col_pooling_sequence = [-0x4000000, 1, 1]
+        t = gen_nn_ops.FractionalMaxPoolGrad(
+            orig_input=orig_input,
+            orig_output=orig_output,
+            out_backprop=out_backprop,
+            row_pooling_sequence=row_pooling_sequence,
+            col_pooling_sequence=col_pooling_sequence,
+            overlapping=overlapping)
+        self.evaluate(t)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py b/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
index 9fb7724f695375..f44c7316845b21 100644
--- a/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/lrn_op_test.py
@@ -20,11 +20,13 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
@@ -111,6 +113,41 @@ def testGradientsZeroInput(self):
     self.assertAllClose(r, expected)
     self.assertShapeEqual(expected, grad)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testIncompatibleInputAndOutputImageShapes(self):
+    depth_radius = 1
+    bias = 1.59018219
+    alpha = 0.117728651
+    beta = 0.404427052
+    input_grads = random_ops.random_uniform(
+        shape=[4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    input_image = random_ops.random_uniform(
+        shape=[4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    invalid_output_image = random_ops.random_uniform(
+        shape=[4, 4, 4, 4, 4, 4],
+        minval=-10000,
+        maxval=10000,
+        dtype=dtypes.float32,
+        seed=-2033)
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      self.evaluate(
+          nn.lrn_grad(
+              input_grads=input_grads,
+              input_image=input_image,
+              output_image=invalid_output_image,
+              depth_radius=depth_radius,
+              bias=bias,
+              alpha=alpha,
+              beta=beta))
+
   def _RunAndVerifyGradients(self, dtype):
     with self.cached_session():
       # random shape
diff --git a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
index 71bf5d02bcb568..9c8f11eb743096 100644
--- a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_3d_test.py
@@ -18,6 +18,7 @@
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
@@ -67,7 +68,7 @@ def _VerifyOneTest(self, pool_func, input_sizes, window, strides, padding,
     # Initializes the input tensor with array containing incrementing
     # numbers from 1.
     x = [f * 1.0 for f in range(1, total_size + 1)]
-    with self.cached_session(use_gpu=use_gpu) as sess:
+    with self.cached_session(use_gpu=use_gpu):
       t = constant_op.constant(x, shape=input_sizes)
       window = [1] + list(window) + [1]
       strides = [1] + list(strides) + [1]
@@ -124,6 +125,23 @@ def testAvgPool3dSamePaddingDifferentStrides(self):
         padding="SAME",
         expected=expected_output)
 
+  def testMaxPool3dGrad(self):
+    with self.assertRaises(
+        (errors.ResourceExhaustedError, errors.InvalidArgumentError)):
+      with self.cached_session():
+        orig_input_shape = constant_op.constant(
+            1879048192, shape=[5], dtype=dtypes.int32)
+        grad = constant_op.constant(
+            1, shape=[1, 3, 2, 4, 2], dtype=dtypes.float32)
+        t = gen_nn_ops.AvgPool3DGrad(
+            orig_input_shape=orig_input_shape,
+            grad=grad,
+            ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1],
+            padding="SAME",
+            data_format="NDHWC")
+        self.evaluate(t)
+
   def testMaxPool3dValidPadding(self):
     expected_output = [40.0, 41.0, 42.0]
     self._VerifyValues(
diff --git a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
index aca29d05a3686d..e279f6e0027da2 100644
--- a/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/pooling_ops_test.py
@@ -537,6 +537,18 @@ def testAvgPoolEmptyInput(self, **kwargs):
         expected=[],
         **kwargs)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testRawAvgPoolLargeKsizeRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        t = gen_nn_ops.avg_pool(
+            value=np.ones([1, 1, 1, 1]),
+            ksize=[1, 1e20, 1, 1],
+            strides=[1, 1, 1, 1],
+            padding="SAME",
+            data_format="NHWC")
+        self.evaluate(t)
+
   @parameterized.parameters(
       GetTestConfigsDicts(nn_ops.max_pool, gen_nn_ops.max_pool_v2))
   @test_util.run_deprecated_v1
@@ -760,6 +772,18 @@ def testMaxPoolEmptyInput(self, **kwargs):
         expected=[],
         **kwargs)
 
+  @parameterized.parameters(
+      GetTestConfigsDicts(nn_ops.max_pool, gen_nn_ops.max_pool_v2))
+  @test_util.run_deprecated_v1
+  def testMaxPoolInvalidFilterSize(self, **kwargs):
+    with self.cached_session(use_gpu=test.is_gpu_available()):
+      t = constant_op.constant(1.0, shape=[1, 1, 1, 1])
+      with self.assertRaisesRegex(
+          (errors_impl.InvalidArgumentError, ValueError),
+          "Negative dimension size"):
+        t = self.evaluate(
+            nn_ops.max_pool(t, ksize=[1, 1, 2, 1], strides=1, padding="VALID"))
+
   # Tests for DepthwiseMaxPooling on CPU only.
   @parameterized.parameters(
       GetTestConfigsDicts(
@@ -2470,6 +2494,22 @@ def testMaxPoolGradWithArgmaxEagerShapeErrors(self):
               inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
               padding="VALID")
 
+  def testAvgPoolGradInvalidInputShapeRaiseError(self):
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      with self.cached_session():
+        orig_input_shape = constant_op.constant(
+            -536870912, shape=[4], dtype=dtypes.int32)
+        grad = constant_op.constant(
+            .0890338004362538, shape=[1, 5, 7, 1], dtype=dtypes.float64)
+        t = gen_nn_ops.AvgPoolGrad(
+            orig_input_shape=orig_input_shape,
+            grad=grad,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding="VALID",
+            data_format="NHWC")
+        self.evaluate(t)
+
 
 def GetMaxPoolFwdTest(input_size, filter_size, strides, padding):
 
diff --git a/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py b/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
index bed3cbfd8aa2a9..15a398114124a2 100644
--- a/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
+++ b/tensorflow/python/kernel_tests/nn_ops/rnn_cell_test.py
@@ -1354,6 +1354,58 @@ def testLSTMBlockCellErrorHandling(self):
               cell_clip=cell_clip,
               use_peephole=use_peephole))
 
+  @test_util.run_in_graph_and_eager_modes
+  def testLSTMBlockCellGradErrorHandling(self):
+    use_peephole = False
+    seq_len_max = constant_op.constant(1, shape=[], dtype=dtypes.int64)
+    x = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs_prev = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h_prev = constant_op.constant(
+        0.504355371, shape=[1, 1], dtype=dtypes.float32)
+    w = constant_op.constant(0.504355371, shape=[1, 1], dtype=dtypes.float32)
+    wci = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    wcf = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    wco = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    b = constant_op.constant(0.504355371, shape=[1], dtype=dtypes.float32)
+    i = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    f = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    o = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    ci = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    co = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h = constant_op.constant(0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    cs_grad = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    h_grad = constant_op.constant(
+        0.504355371, shape=[1, 1, 1], dtype=dtypes.float32)
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "must be rank"):
+      self.evaluate(
+          gen_rnn_ops.block_lstm_grad_v2(
+              seq_len_max=seq_len_max,
+              x=x,
+              cs_prev=cs_prev,
+              h_prev=h_prev,
+              w=w,
+              wci=wci,
+              wcf=wcf,
+              wco=wco,
+              b=b,
+              i=i,
+              cs=cs,
+              f=f,
+              o=o,
+              ci=ci,
+              co=co,
+              h=h,
+              cs_grad=cs_grad,
+              h_grad=h_grad,
+              use_peephole=use_peephole))
+
 
 class BidirectionalRNNTest(test.TestCase):
 
diff --git a/tensorflow/python/kernel_tests/quantization_ops/BUILD b/tensorflow/python/kernel_tests/quantization_ops/BUILD
new file mode 100644
index 00000000000000..ff0be9898c601d
--- /dev/null
+++ b/tensorflow/python/kernel_tests/quantization_ops/BUILD
@@ -0,0 +1,24 @@
+# Tests of TensorFlow quantization ops written using the Python API.
+
+# buildifier: disable=same-origin-load
+load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+    licenses = ["notice"],
+)
+
+tf_py_test(
+    name = "quantization_ops_test",
+    size = "small",
+    srcs = ["quantization_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//third_party/py/numpy",
+    ],
+)
diff --git a/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py b/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py
new file mode 100644
index 00000000000000..7f23b69c2a1e71
--- /dev/null
+++ b/tensorflow/python/kernel_tests/quantization_ops/quantization_ops_test.py
@@ -0,0 +1,478 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf.quantize ops."""
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import googletest
+
+
+class FakeQuantWithMinMaxVarsOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars(
+              inputs=inputs, min=0.0, max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars(
+              inputs=inputs, min=[[1.0], [2.0], [4.0]], max=1.0))
+
+
+class FakeQuantWithMinMaxVarsPerChannelOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[[0.0]], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Dimensions must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[0.0, 0.1], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[1.0], max=[[1.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Dimensions must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel(
+              inputs=inputs, min=[0.0], max=[1.0, 1.1]))
+
+
+class FakeQuantWithMinMaxVarsGradientOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    gradients = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be equal rank|must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_gradient(
+              gradients=gradients,
+              inputs=inputs,
+              min=0.0,
+              max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_gradient(
+              gradients=gradients,
+              inputs=inputs,
+              min=[[1.0], [2.0], [4.0]],
+              max=[[1.0], [2.0], [4.0]]))
+
+
+class FakeQuantWithMinMaxVarsPerChannelGradientOpTest(
+    test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    gradients = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+    inputs = constant_op.constant(
+        value=[[1.0], [2.0], [4.0]], dtype=dtypes.float32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Shapes must be equal rank|must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[[0.0]], max=[1.0]))
+
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "Dimension 0 in both shapes must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[0.0, 0.1], max=[1.0]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "Shapes must be equal rank|must be rank 1"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[1.0], max=[[1.0]]))
+
+    with self.assertRaisesRegex(
+        (ValueError, errors.InvalidArgumentError),
+        "Dimension 0 in both shapes must be equal|incorrect size"):
+      self.evaluate(
+          array_ops.fake_quant_with_min_max_vars_per_channel_gradient(
+              gradients=gradients, inputs=inputs, min=[0.0], max=[1.0, 1.1]))
+
+
+class QuantizedBiasedAddTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.qint8)
+    bias = constant_op.constant(np.int8(0), shape=[3], dtype=dtypes.qint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=[],
+              max_input=1.0,
+              min_bias=0.0,
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=[],
+              min_bias=0.0,
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=1.0,
+              min_bias=[],
+              max_bias=1.0,
+              out_type=dtypes.qint32))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_bias_add(
+              input=inputs,
+              bias=bias,
+              min_input=0.0,
+              max_input=1.0,
+              min_bias=0.0,
+              max_bias=[],
+              out_type=dtypes.qint32))
+
+
+class QuantizedInstanceNormOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.quantized_instance_norm(
+              x=inputs, x_min=0.0, x_max=[[1.0], [2.0], [4.0]]))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          array_ops.quantized_instance_norm(
+              x=inputs, x_min=[[1.0], [2.0], [4.0]], x_max=1.0))
+
+
+class QuantizedAvgPoolingOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    ksize = [1, 1, 1, 1]
+    strides = [1, 1, 1, 1]
+    padding = "SAME"
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_avg_pool(
+              input=inputs,
+              min_input=[],
+              max_input=1.0,
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_avg_pool(
+              input=inputs,
+              min_input=0.0,
+              max_input=[],
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+
+class QuantizedMaxPoolingOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.uint8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    ksize = [1, 1, 1, 1]
+    strides = [1, 1, 1, 1]
+    padding = "SAME"
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_max_pool(
+              input=inputs,
+              min_input=[],
+              max_input=1.0,
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                "must be.* rank 0"):
+      self.evaluate(
+          nn_ops.quantized_max_pool(
+              input=inputs,
+              min_input=0.0,
+              max_input=[],
+              ksize=ksize,
+              strides=strides,
+              padding=padding))
+
+
+class RequantizeOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int32(0), shape=[3, 3, 3, 3], dtype=dtypes.qint32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=[],
+              input_max=1.0,
+              requested_output_min=0.0,
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=[],
+              requested_output_min=0.0,
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=1.0,
+              requested_output_min=[],
+              requested_output_max=1.0,
+              out_type=dtypes.qint8))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.requantize(
+              input=inputs,
+              input_min=0.0,
+              input_max=1.0,
+              requested_output_min=0.0,
+              requested_output_max=[],
+              out_type=dtypes.qint8))
+
+
+class QuantizedAddOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    x = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+    y = constant_op.constant(np.int8(0), shape=[3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.quantized_add(
+              x=x,
+              y=y,
+              min_x=[],
+              max_x=1.0,
+              min_y=0.0,
+              max_y=1.0,
+              Toutput=dtypes.qint32))
+
+
+class QuantizedReluOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_relu(
+              features=inputs,
+              min_features=[],
+              max_features=127.0,
+              out_type=dtypes.quint8))
+
+
+class QuantizedRelu6OpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int8(0), shape=[3, 3, 3, 3], dtype=dtypes.quint8)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          nn_ops.quantized_relu6(
+              features=inputs,
+              min_features=[],
+              max_features=127.0,
+              out_type=dtypes.quint8))
+
+
+class QuantizeAndDequantizeV3OpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_valid(self):
+    with ops.Graph().as_default(), context.eager_mode():
+      input_value = constant_op.constant([-0.8, -0.5, 0, 0.3, 0.8, -2.0],
+                                         shape=(6,),
+                                         dtype=dtypes.float32),
+      input_min = constant_op.constant(-127, shape=(), dtype=dtypes.float32)
+      input_max = constant_op.constant(127, shape=(), dtype=dtypes.float32)
+      num_bits = constant_op.constant(8, shape=(), dtype=dtypes.int32)
+
+      quantized = array_ops.quantize_and_dequantize_v3(
+          input_value,
+          input_min,
+          input_max,
+          num_bits,
+          signed_input=True,
+          range_given=False)
+      self.assertSequenceAlmostEqual(
+          input_value[0].numpy(), quantized.numpy()[0], delta=0.05)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int32(0), shape=[3, 3, 3, 3], dtype=dtypes.qint32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.quantize_down_and_shrink_range(
+              input=inputs, input_min=[], input_max=4.0,
+              out_type=dtypes.quint8))
+
+    input_value = constant_op.constant([-0.8, -0.5, 0, 0.3, 0.8, -2.0],
+                                       shape=(6,),
+                                       dtype=dtypes.float32),
+    input_min = constant_op.constant(-127, shape=(), dtype=dtypes.float32)
+    input_max = constant_op.constant(127, shape=(), dtype=dtypes.float32)
+    # Tensor with invalid shape and invalid number of elements.
+    num_bits = constant_op.constant([], shape=(0,), dtype=dtypes.int32)
+
+    # Test that running the op raises error. It raises different errors
+    # depending on whether the shape inference is run first or the op's
+    # Compute() is run first.
+    try:
+      array_ops.quantize_and_dequantize_v3(
+          input_value, input_min, input_max, num_bits, signed_input=True)
+    except Exception as ex:  # pylint: disable=broad-except
+      if isinstance(ex, errors.InvalidArgumentError):
+        self.assertRegex(str(ex), "The `num_bits` tensor should be a scalar.")
+      elif isinstance(ex, ValueError):
+        self.assertRegex(str(ex), "Shape must be rank 0")
+      else:
+        self.fail(
+            "Raised exception other than expected: %s. "
+            "Expected exceptions are errors.InvalidArgumentError or ValueError",
+            ex.__name__)
+    else:
+      self.fail(
+          "Did not raise an exception where it is expected to raise either "
+          "a ValueError or errors.InvalidArgumentError.")
+
+
+class QuantizeDownAndShrinkRangeOpTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_inputs(self):
+    inputs = constant_op.constant(
+        np.int32(0), shape=[3, 3, 3, 3], dtype=dtypes.qint32)
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "must be rank 0"):
+      self.evaluate(
+          math_ops.quantize_down_and_shrink_range(input=inputs,
+                                                  input_min=[],
+                                                  input_max=4.0,
+                                                  out_type=dtypes.quint8))
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py b/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
index b70a30f46062c5..396843ace3ae0d 100644
--- a/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
+++ b/tensorflow/python/kernel_tests/random/candidate_sampler_ops_test.py
@@ -18,6 +18,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import candidate_sampling_ops
@@ -127,6 +128,27 @@ def draw(seed):
     # twice very rarely.
     self.assertLessEqual(num_same, 2)
 
+  def testCandidateOutOfRange(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of range"):
+      self.evaluate(
+          candidate_sampling_ops.log_uniform_candidate_sampler(
+              true_classes=[[0, 10]],
+              num_true=2,
+              num_sampled=1000,
+              unique=False,
+              range_max=2))
+
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "out of range"):
+      self.evaluate(
+          candidate_sampling_ops.log_uniform_candidate_sampler(
+              true_classes=[[0, -10]],
+              num_true=2,
+              num_sampled=1000,
+              unique=False,
+              range_max=2))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py b/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
index a1c6072abfda49..8ad859230edcc6 100644
--- a/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
+++ b/tensorflow/python/kernel_tests/random/parameterized_truncated_normal_op_test.py
@@ -303,6 +303,29 @@ def testSamplingWithSmallStdDevFarFromBound(self):
       self.assertAllGreater(samples, 0.)
       self.assertAllGreater(samples_stateless, 0.)
 
+  def testShapeTypes(self):
+    for shape_dtype in [np.int32, np.int64]:
+      shape = np.array([1000], dtype=shape_dtype)
+      sample_op = random_ops.parameterized_truncated_normal(
+          shape=shape, means=0.0, stddevs=0.1, minvals=-1., maxvals=1.)
+      new_seed = random_ops.random_uniform([2],
+                                           seed=1234,
+                                           minval=0,
+                                           maxval=(2**31 - 1),
+                                           dtype=np.int32)
+      sample_op_stateless = stateless.stateless_parameterized_truncated_normal(
+          shape=shape,
+          seed=new_seed,
+          means=0.0,
+          stddevs=0.1,
+          minvals=-1.,
+          maxvals=1.)
+
+      samples = self.evaluate(sample_op)
+      stateless_samples = self.evaluate(sample_op_stateless)
+      self.assertAllEqual(samples.shape, shape)
+      self.assertAllEqual(stateless_samples.shape, shape)
+
   def testStatelessParameterizedTruncatedNormalHasGrads(self):
     mean = variables.Variable(0.01)
     stddev = variables.Variable(1.)
diff --git a/tensorflow/python/kernel_tests/random/random_gamma_test.py b/tensorflow/python/kernel_tests/random/random_gamma_test.py
index 71b06da183f332..4d3ea4fe5284e6 100644
--- a/tensorflow/python/kernel_tests/random/random_gamma_test.py
+++ b/tensorflow/python/kernel_tests/random/random_gamma_test.py
@@ -16,7 +16,10 @@
 
 import numpy as np
 
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
@@ -216,6 +219,16 @@ def testPositive(self):
         self.assertEqual(0, math_ops.reduce_sum(math_ops.cast(
             math_ops.less_equal(x, 0.), dtype=dtypes.int64)).eval())
 
+  def testSizeTooLarge(self):
+    # Grappler asserts on size overflow, so this error is only caught when
+    # running eagerly.
+    if context.executing_eagerly():
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "overflow"):
+        rate = constant_op.constant(1.0, shape=(4, 4, 4, 4, 4))
+        self.evaluate(
+            random_ops.random_gamma(
+                shape=[46902, 51188, 34063, 59195], alpha=rate))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/random/random_poisson_test.py b/tensorflow/python/kernel_tests/random/random_poisson_test.py
index 9f21f91ed5728f..c0470e6029aa99 100644
--- a/tensorflow/python/kernel_tests/random/random_poisson_test.py
+++ b/tensorflow/python/kernel_tests/random/random_poisson_test.py
@@ -17,6 +17,7 @@
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.kernel_tests.random import util
@@ -171,6 +172,14 @@ def testInfRate(self):
     sample = random_ops.random_poisson(shape=[2], lam=np.inf)
     self.assertAllEqual([np.inf, np.inf], self.evaluate(sample))
 
+  def testSizeTooLarge(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "overflow"):
+      rate = constant_op.constant(1.0, shape=(4, 4, 4, 4, 4))
+      self.evaluate(
+          random_ops.random_poisson(
+              shape=[46902, 51188, 34063, 59195], lam=rate))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/sparse_ops/BUILD b/tensorflow/python/kernel_tests/sparse_ops/BUILD
index d4f2210a8de8fa..72ec0c34645b98 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/BUILD
+++ b/tensorflow/python/kernel_tests/sparse_ops/BUILD
@@ -263,7 +263,10 @@ cuda_py_test(
     name = "sparse_xent_op_d9m_test",
     size = "medium",
     srcs = ["sparse_xent_op_d9m_test.py"],
-    tags = ["no_windows"],  # Fails as SegmentSum is nondeterministic on Windows
+    tags = [
+        "no_windows",  # Fails as SegmentSum is nondeterministic on Windows
+        "no_oss",  # TODO(b/258503209): Disable the test. 
+    ],
     xla_enable_strict_auto_jit = False,
     deps = [
         ":sparse_xent_op_test_base",
diff --git a/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py b/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
index 28a133d982feaa..94f170454d7b5f 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops/sparse_cross_op_test.py
@@ -873,6 +873,14 @@ def test_all_columns_empty(self):
     with self.cached_session():
       self._assert_sparse_tensor_empty(self.evaluate(out))
 
+  def testNonScalarInput(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'Input separator should be a scalar.'):
+      self.evaluate(sparse_ops.sparse_cross(
+          inputs=[],
+          name='a',
+          separator=constant_op.constant(['a', 'b'], dtype=dtypes.string)))
+
 
 class SparseCrossHashedOpTest(BaseSparseCrossOpTest):
 
diff --git a/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
index 684d1f98432b53..c1ceac68040318 100644
--- a/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops/sparse_ops_test.py
@@ -514,6 +514,13 @@ def testFillNumber(self):
         self.assertAllEqual(empty_row_indicator_out,
                             np.array([0, 0, 1, 0, 1]).astype(np.bool_))
 
+  def testSparseFillEmptyRowsGradEmpty(self):
+    with test_util.use_gpu():
+      grad, _ = self.evaluate(
+          sparse_ops.sparse_fill_empty_rows_grad(
+              reverse_index_map=[], grad_values=[]))
+      self.assertAllEqual(grad, [])
+
   @test_util.run_deprecated_v1
   def testFillFloat(self):
     with self.session():
diff --git a/tensorflow/python/lib/core/BUILD b/tensorflow/python/lib/core/BUILD
index 9f30831235eec1..d297b9a9a2d6f5 100644
--- a/tensorflow/python/lib/core/BUILD
+++ b/tensorflow/python/lib/core/BUILD
@@ -82,6 +82,7 @@ cc_library(
     deps = [
         ":bfloat16_lib",
         ":numpy_lib",
+        ":py_util",
         "//tensorflow/c:c_api_no_xla",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
index 1c1bf40de426b9..6fd213cd83d329 100644
--- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
@@ -13,8 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+// clang-format off
 // Must be included first.
 #include "tensorflow/python/lib/core/numpy.h"
+// clang-format on
+
+#include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
 
 #include <vector>
 
@@ -22,7 +26,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/python/lib/core/bfloat16.h"
-#include "tensorflow/python/lib/core/ndarray_tensor_bridge.h"
+#include "tensorflow/python/lib/core/py_util.h"
 
 namespace tensorflow {
 
@@ -206,8 +210,28 @@ Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
     return s;
   }
 
+  if (dim_size > NPY_MAXDIMS) {
+    return errors::InvalidArgument(
+        "Cannot convert tensor with ", dim_size,
+        " dimensions to NumPy array. NumPy arrays can have at most ",
+        NPY_MAXDIMS, " dimensions");
+  }
   auto* np_array = reinterpret_cast<PyArrayObject*>(
       PyArray_SimpleNewFromData(dim_size, dims, type_num, data));
+  if (np_array == nullptr) {
+    string shape_str = absl::StrJoin(
+        absl::Span<npy_intp>{dims, static_cast<size_t>(dim_size)}, ", ");
+    if (PyErr_Occurred()) {
+      string exception_str = PyExceptionFetch();
+      PyErr_Clear();
+      return errors::InvalidArgument(
+          "Failed to create numpy array from tensor of shape [", shape_str,
+          "]. Numpy error: ", exception_str);
+    }
+    return errors::Internal(
+        "Failed to create numpy array from tensor of shape [", shape_str, "]");
+  }
+
   PyArray_CLEARFLAGS(np_array, NPY_ARRAY_OWNDATA);
   if (PyType_Ready(&TensorReleaserType) == -1) {
     return errors::Unknown("Python type initialization failed.");
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 96f4bab383adbb..429f29b0bd5884 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -83,8 +83,8 @@ bool IsCPUDevice(const Device* d) {
   return d == nullptr || d->tensorflow_accelerator_device_info() == nullptr;
 }
 
-// Givens the 'call', prepares the token and inputs as a python tuple
-// that is appropriate for calling the trampoline.
+// Given the 'call', prepares the token and inputs as a python tuple that is
+// appropriate for calling the trampoline.
 Status MakeArgTuple(const PyCall* call, TFE_Context* ctx, PyObject** tuple) {
   int64_t n = call->ins.size();
   PyObject* lst = PyList_New(n);
@@ -119,7 +119,11 @@ Status MakeArgTuple(const PyCall* call, TFE_Context* ctx, PyObject** tuple) {
     PyList_SetItem(lst, i, arg);
   }
   *tuple = Py_BuildValue("(ssN)", call->token.c_str(), device_name, lst);
-  CHECK(*tuple);
+  if (*tuple == nullptr) {
+    return errors::Internal(
+        "Failed to create python tuple. Please make sure `token` is a "
+        "well-formed UTF-8 string.");
+  }
   return Status::OK();
 }
 
diff --git a/tensorflow/python/ops/batch_ops_test.py b/tensorflow/python/ops/batch_ops_test.py
index 7ef5e06fb6757d..15a1a71a93bb63 100644
--- a/tensorflow/python/ops/batch_ops_test.py
+++ b/tensorflow/python/ops/batch_ops_test.py
@@ -20,7 +20,9 @@
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -30,6 +32,7 @@
 from tensorflow.python.ops import gen_batch_ops
 from tensorflow.python.ops import gen_functional_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import variables
@@ -233,6 +236,26 @@ def worker():
       self.assertEqual(thread_results[0], [2])
       self.assertEqual(main_results[0], [3])
 
+  def testUnbatchInvalidIdArg(self):
+    """Tests that unbatch work together."""
+    if context.executing_eagerly():
+      batched_tensor = constant_op.constant(
+          value=np.random.random(size=(3, 3, 1)), dtype=dtypes.float64)
+      batched_index = constant_op.constant(
+          value=np.random.randint(0, 100, size=(3, 3, 1)), dtype=dtypes.int64)
+      arg_id = constant_op.constant(
+          value=np.random.randint(0, 100, size=(3, 3, 1)), dtype=dtypes.int64)
+
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  "Input id should be scalar;"):
+        batch_ops.unbatch(
+            batched_tensor=batched_tensor,
+            batch_index=batched_index,
+            id=arg_id,
+            timeout_micros=50,
+            container="",
+            shared_name="")
+
   def testBatchDecoratedWithCapturedInput(self):
     """Tests that the batch_function decorator works."""
     if context.executing_eagerly():
@@ -557,6 +580,56 @@ def worker():
       # The thread's call should hit the timeout, and thus get 0 results.
       self.assertEqual(len(thread_results), 0)
 
+  def testUnbatchGradInvalidId(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=constant_op.constant([1]),
+              batch_index=constant_op.constant([
+                  [0, 0, 0],
+              ], dtype=dtypes.int64),
+              grad=constant_op.constant([
+                  1,
+              ]),
+              id=constant_op.constant([
+                  1,
+                  1,
+              ], dtype=dtypes.int64)))
+
+  def testUnbatchGradInvalidBatchId(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=constant_op.constant([1]),
+              batch_index=constant_op.constant([
+                  [0, 0],
+              ], dtype=dtypes.int64),
+              grad=constant_op.constant([
+                  1,
+              ]),
+              id=constant_op.constant([
+                  1,
+              ], dtype=dtypes.int64)))
+
+  def testUnbatchGradInvalidArgs(self):
+    original_input = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.float64, maxval=None)
+    batch_index = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.int64, maxval=65536)
+    grad = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.float64, maxval=None)
+    batch_id = random_ops.random_uniform(
+        shape=(3, 1), dtype=dtypes.int64, maxval=65536)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(
+          gen_batch_ops.unbatch_grad(
+              original_input=original_input,
+              batch_index=batch_index,
+              grad=grad,
+              id=batch_id,
+              container="",
+              shared_name="",
+              name=""))
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py
index a95896c0a23a07..5b57956996b8ac 100644
--- a/tensorflow/python/ops/collective_ops_test.py
+++ b/tensorflow/python/ops/collective_ops_test.py
@@ -451,6 +451,20 @@ def testCollectiveGroupSizeMismatch(self):
     ])
     context.ensure_initialized()
 
+  @test_util.run_v2_only
+  def testCollectiveGatherShapeCheckFailure(self):
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'input should have rank > 0'):
+      collective_ops.gen_collective_ops.CollectiveGather(
+          input=1,
+          group_size=1,
+          group_key=1,
+          instance_key=1,
+          shape=(3, 3, 3),
+          communication_hint='auto',
+          timeout_seconds=0,
+          name='')
+
     @def_function.function
     def run_all_reduce():
       group_key = 10
diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py
index c5359a232a5a2d..c80058e06b3832 100644
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@@ -2335,6 +2335,29 @@ def testInvalidInput(self):
         self.evaluate(v)
 
 
+class ImageProjectiveTransformV2(test_util.TensorFlowTestCase):
+
+  def testShapeTooLarge(self):
+    interpolation = "BILINEAR"
+    fill_mode = "REFLECT"
+    images = constant_op.constant(
+        0.184634328, shape=[2, 5, 8, 3], dtype=dtypes.float32)
+    transforms = constant_op.constant(
+        0.378575385, shape=[2, 8], dtype=dtypes.float32)
+    output_shape = constant_op.constant([1879048192, 1879048192],
+                                        shape=[2],
+                                        dtype=dtypes.int32)
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r"Encountered overflow when multiplying"):
+      self.evaluate(
+          gen_image_ops.ImageProjectiveTransformV2(
+              images=images,
+              transforms=transforms,
+              output_shape=output_shape,
+              interpolation=interpolation,
+              fill_mode=fill_mode))
+
+
 class InternalPadToBoundingBoxTest(test_util.TensorFlowTestCase,
                                    parameterized.TestCase):
 
@@ -4152,6 +4175,25 @@ def testPad(self):
     self._assertReturns(x, x_shape, y, y_shape)
 
 
+class ResizeNearestNeighborGrad(test_util.TensorFlowTestCase):
+
+  def testSizeTooLarge(self):
+    align_corners = True
+    half_pixel_centers = False
+    grads = constant_op.constant(1, shape=[1, 8, 16, 3], dtype=dtypes.float16)
+    size = constant_op.constant([1879048192, 1879048192],
+                                shape=[2],
+                                dtype=dtypes.int32)
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r"Encountered overflow when multiplying"):
+      self.evaluate(
+          gen_image_ops.ResizeNearestNeighborGrad(
+              grads=grads,
+              size=size,
+              align_corners=align_corners,
+              half_pixel_centers=half_pixel_centers))
+
+
 class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
 
   def _ResizeImageWithCropOrPad(self, x, target_height, target_width,
diff --git a/tensorflow/python/ops/ragged/ragged_range_op_test.py b/tensorflow/python/ops/ragged/ragged_range_op_test.py
index 8465fb25997b8b..aaf48bf9783785 100644
--- a/tensorflow/python/ops/ragged/ragged_range_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_range_op_test.py
@@ -84,8 +84,7 @@ def testBroadcast(self):
          list(range(5, 15, 3))])
 
     # Broadcast all arguments.
-    self.assertAllEqual(
-        ragged_math_ops.range(0, 5, 1), [list(range(0, 5, 1))])
+    self.assertAllEqual(ragged_math_ops.range(0, 5, 1), [list(range(0, 5, 1))])
 
   def testEmptyRanges(self):
     rt1 = ragged_math_ops.range([0, 5, 3], [0, 3, 5])
@@ -108,6 +107,10 @@ def testKernelErrors(self):
                                 r'Requires delta != 0'):
       self.evaluate(ragged_math_ops.range(0, 0, 0))
 
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r'Requires \(\(limit - start\) / delta\) <='):
+      self.evaluate(ragged_math_ops.range(0.1, 1e10, 1e-10))
+
   def testShape(self):
     self.assertAllEqual(
         ragged_math_ops.range(0, 0, 1).shape.as_list(), [1, None])
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index a801a342b204ae..32ddfaac8e4a5d 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -1465,6 +1465,21 @@ def testUnbatchVariantInDataset(self):
         for i in range(3):
           self.assertAllEqual(sess.run(rt[i]), out)
 
+  def testToVariantInvalidParams(self):
+    self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                           r'be rank 1 but is rank 0',
+                           gen_ragged_conversion_ops.ragged_tensor_to_variant,
+                           rt_nested_splits=[0, 1, 2],
+                           rt_dense_values=[0, 1, 2],
+                           batched_input=True)
+
+    self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                           r'be rank 1 but is rank 2',
+                           gen_ragged_conversion_ops.ragged_tensor_to_variant,
+                           rt_nested_splits=[[[0]], [[1]], [[2]]],
+                           rt_dense_values=[0, 1, 2],
+                           batched_input=True)
+
   def testFromVariantInvalidParams(self):
     rt = ragged_factory_ops.constant([[0], [1], [2], [3]])
     batched_variant = rt._to_variant(batched_input=True)
diff --git a/tensorflow/python/ops/script_ops_test.py b/tensorflow/python/ops/script_ops_test.py
index e9168741d9f55b..93109da05a0227 100644
--- a/tensorflow/python/ops/script_ops_test.py
+++ b/tensorflow/python/ops/script_ops_test.py
@@ -16,8 +16,11 @@
 
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import gen_script_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops.script_ops import numpy_function
 from tensorflow.python.platform import test
@@ -87,5 +90,30 @@ def func_stateful(a, b):
                      2)  # as stateful, func is guaranteed to execute twice
 
 
+class PyFunctionTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_variable_arguments(self):
+
+    def plus(a, b):
+      return a + b
+
+    v1 = resource_variable_ops.ResourceVariable(1)
+    self.evaluate(v1.initializer)
+
+    actual_result = script_ops.eager_py_func(plus, [v1, 2], dtypes.int32)
+    expect_result = constant_op.constant(3, dtypes.int32)
+    self.assertAllEqual(actual_result, expect_result)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_fail_on_non_utf8_token(self):
+    value = constant_op.constant(value=[1, 2])
+    token = b"\xb0"
+    data_type = [dtypes.int32]
+    with self.assertRaises((errors.InternalError, UnicodeDecodeError)):
+      self.evaluate(
+          gen_script_ops.py_func(input=[value], token=token, Tout=data_type))
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/ops/sobol_ops_test.py b/tensorflow/python/ops/sobol_ops_test.py
index f026e4434fae7c..24abf790704ae0 100644
--- a/tensorflow/python/ops/sobol_ops_test.py
+++ b/tensorflow/python/ops/sobol_ops_test.py
@@ -16,9 +16,12 @@
 import numpy as np
 
 from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
@@ -126,5 +129,15 @@ def test_default_dtype(self):
     s = math_ops.sobol_sample(10, 100)
     self.assertEqual(dtypes.float32, s.dtype)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_non_scalar_input(self):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                r'Shape must be rank 0 but is rank 1|'
+                                r'\w+ must be a scalar'):
+      self.evaluate(gen_math_ops.sobol_sample(
+          dim=7,
+          num_results=constant_op.constant([1, 0]),
+          skip=constant_op.constant([1])))
+
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/platform/BUILD b/tensorflow/python/platform/BUILD
index 5e77b619185988..bc8231eceacda5 100644
--- a/tensorflow/python/platform/BUILD
+++ b/tensorflow/python/platform/BUILD
@@ -159,6 +159,7 @@ tf_py_test(
     tags = [
         "no_windows",
         "nomac",
+        "no_oss",  # TODO(b/258503209): Disable the test.
     ],
     deps = [
         ":client_testlib",
diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py
index dd37a1e7a4da53..0329ba1c784f43 100644
--- a/tensorflow/python/summary/summary_test.py
+++ b/tensorflow/python/summary/summary_test.py
@@ -23,6 +23,7 @@
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -183,6 +184,11 @@ def testAudioSummaryWithFamily(self):
         'family/outer/family/inner/audio/{}'.format(i) for i in range(3))
     self.assertEqual(tags, expected)
 
+  def testAudioSummaryWithInvalidSampleRate(self):
+    with self.assertRaises(errors.InvalidArgumentError):
+      invalid_sample_rate = [22000.0, 22000.0]
+      self.evaluate(summary_lib.audio('', [[1.0]], invalid_sample_rate))
+
   @test_util.run_deprecated_v1
   def testTextSummary(self):
     with self.cached_session():
diff --git a/tensorflow/python/summary/writer/writer_test.py b/tensorflow/python/summary/writer/writer_test.py
index 80c1faf0c5e800..2a3a5c7c2fc8d6 100644
--- a/tensorflow/python/summary/writer/writer_test.py
+++ b/tensorflow/python/summary/writer/writer_test.py
@@ -229,7 +229,7 @@ def testNeitherGraphNorGraphDef(self):
       sw.close()
 
   @test_util.run_deprecated_v1
-  def testCloseAndReopen(self):
+  def disabled_testCloseAndReopen(self):
     test_dir = self._CleanTestDir("close_and_reopen")
     sw = self._FileWriter(test_dir)
     sw.add_session_log(event_pb2.SessionLog(status=SessionLog.START), 1)
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 143bc0294da0ce..d2e501a0293297 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -56,7 +56,7 @@ def register_extension_info(**kwargs):
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
 # and tensorflow/tools/pip_package/setup.py
-VERSION = "2.9.0"
+VERSION = "2.9.3"
 VERSION_MAJOR = VERSION.split(".")[0]
 two_gpu_tags = ["requires-gpu-nvidia:2", "notap", "manual", "no_pip"]
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu.arm64 b/tensorflow/tools/ci_build/Dockerfile.cpu.arm64
new file mode 100644
index 00000000000000..8cdda10a8c3a0b
--- /dev/null
+++ b/tensorflow/tools/ci_build/Dockerfile.cpu.arm64
@@ -0,0 +1,38 @@
+FROM quay.io/pypa/manylinux2014_aarch64
+
+RUN yum -y check-update || true && \
+    yum install -y \
+        sudo \
+        wget \
+        openssl-devel \
+        libffi-devel \
+        java-1.8.0-openjdk-devel \
+        bzip2-devel \
+        gdbm-devel \
+        ncurses-devel \
+        nss-devel \
+        readline-devel \
+        sqlite-devel && \
+    yum clean all
+
+COPY install/install_bazel.sh /install/
+RUN /install/install_bazel.sh
+
+ARG py_major_minor_version
+
+ENV TF_PYTHON_VERSION=python${py_major_minor_version}
+ENV PYTHON_BIN_PATH=/usr/local/bin/${TF_PYTHON_VERSION}
+
+RUN ln -s ${PYTHON_BIN_PATH} /usr/local/bin/python && \
+    ln -s ${PYTHON_BIN_PATH} /usr/local/bin/python3
+
+RUN curl -o /tmp/get-pip.py https://bootstrap.pypa.io/get-pip.py && \
+    python /tmp/get-pip.py && \
+    rm -f /tmp/get-pip.py
+
+RUN export PYTHON_VERSION=$(python -c 'import platform; print(platform.python_version())') && \
+    ln -s /opt/_internal/cpython-$PYTHON_VERSION/bin/pip3 /usr/local/bin/pip${py_major_minor_version} && \
+    ln -s /opt/_internal/cpython-$PYTHON_VERSION/bin/pip3 /usr/local/bin/pip3 && \
+    ln -s /opt/_internal/cpython-$PYTHON_VERSION/bin/pip /usr/local/bin/pip
+
+RUN pip3 install packaging
diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh
index b630d1a5d7d6a6..d503b6f95fd60b 100755
--- a/tensorflow/tools/ci_build/builds/pip_new.sh
+++ b/tensorflow/tools/ci_build/builds/pip_new.sh
@@ -762,12 +762,10 @@ if [[ ${OS_TYPE} == "ubuntu" ]] && \
   for WHL_PATH in $(ls ${PIP_WHL_DIR}/*.whl); do
     # Repair the wheels for cpu manylinux2010/manylinux2014
     echo "auditwheel repairing ${WHL_PATH}"
-    auditwheel repair --plat ${AUDITWHEEL_TARGET_PLAT}_x86_64 -w "${WHL_DIR}" "${WHL_PATH}"
+    auditwheel repair --plat ${AUDITWHEEL_TARGET_PLAT}_$(uname -m) -w "${WHL_DIR}" "${WHL_PATH}"
 
-    WHL_BASE_NAME=$(basename "${WHL_PATH}")
-    AUDITED_WHL_NAME="${WHL_DIR}"/$(echo "${WHL_BASE_NAME//linux/${AUDITWHEEL_TARGET_PLAT}}")
-    if [[ -f ${AUDITED_WHL_NAME} ]]; then
-      WHL_PATH=${AUDITED_WHL_NAME}
+    if [[ $(ls ${WHL_DIR} | grep ${AUDITWHEEL_TARGET_PLAT} | wc -l) == 1 ]] ; then
+      WHL_PATH=${WHL_DIR}/$(ls ${WHL_DIR} | grep ${AUDITWHEEL_TARGET_PLAT})
       echo "Repaired ${AUDITWHEEL_TARGET_PLAT} wheel file at: ${WHL_PATH}"
     else
       die "WARNING: Cannot find repaired wheel."
diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user
index 0c8c5069936ba4..b09134d5ad13a7 100755
--- a/tensorflow/tools/ci_build/builds/with_the_same_user
+++ b/tensorflow/tools/ci_build/builds/with_the_same_user
@@ -27,39 +27,45 @@ set -e
 
 COMMAND=("$@")
 
-if ! touch /this_is_writable_file_system; then
-  echo "You can't write to your filesystem!"
-  echo "If you are in Docker you should check you do not have too many images" \
-      "with too many files in them. Docker has some issue with it."
-  exit 1
+if [[ $(awk -F= '/^NAME/{print $2}' /etc/os-release) == *"CentOS"* ]]; then
+  ${COMMAND[@]}
 else
-  rm /this_is_writable_file_system
-fi
 
-if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then
-  ADDUSER_OPTS="--force-badname"
-fi
+  if ! touch /this_is_writable_file_system; then
+    echo "You can't write to your filesystem!"
+    echo "If you are in Docker you should check you do not have too many images" \
+        "with too many files in them. Docker has some issue with it."
+    exit 1
+  else
+    rm /this_is_writable_file_system
+  fi
 
-apt-get install sudo
+  if [ -n "${CI_BUILD_USER_FORCE_BADNAME}" ]; then
+    ADDUSER_OPTS="--force-badname"
+  fi
 
-getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
-getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
-    --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
-    --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
-    --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
-usermod -a -G sudo "${CI_BUILD_USER}"
-echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
+  apt-get install sudo
 
-if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
-  # ROCm requires the video group in order to use the GPU for compute. If it
-  # exists on the host, add it to the container.
-  getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
-fi
+  getent group "${CI_BUILD_GID}" || addgroup ${ADDUSER_OPTS} --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
+  getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \
+      --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
+      --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
+      --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
+  usermod -a -G sudo "${CI_BUILD_USER}"
+  echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
 
-if [ -e /root/.bazelrc ]; then
-  cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
-  chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
-fi
+  if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then
+    # ROCm requires the video group in order to use the GPU for compute. If it
+    # exists on the host, add it to the container.
+    getent group video || addgroup video && adduser "${CI_BUILD_USER}" video
+  fi
 
-sudo -u "#${CI_BUILD_UID}" --preserve-env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
-"HOME=${CI_BUILD_HOME}" ${COMMAND[@]}
+  if [ -e /root/.bazelrc ]; then
+    cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc"
+    chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc"
+  fi
+
+  sudo -u "#${CI_BUILD_UID}" --preserve-env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
+  "HOME=${CI_BUILD_HOME}" ${COMMAND[@]}
+
+fi
diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh
index c8a605c322d2bf..a41b158d2c72db 100755
--- a/tensorflow/tools/ci_build/install/install_bazel.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel.sh
@@ -29,12 +29,17 @@ set -e
 # Install bazel.
 mkdir -p /bazel
 cd /bazel
-if [[ ! -f "bazel-$BAZEL_VERSION-installer-linux-x86_64.sh" ]]; then
-  curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
+if [[ $(uname -m) == "aarch64" ]]; then
+  curl -o /usr/local/bin/bazel -fSsL https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-linux-arm64
+  chmod +x /usr/local/bin/bazel
+else
+  if [[ ! -f "bazel-$BAZEL_VERSION-installer-linux-x86_64.sh" ]]; then
+    curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
+  fi
+  chmod +x /bazel/bazel-*.sh
+  /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
+  rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 fi
-chmod +x /bazel/bazel-*.sh
-/bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
-rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh
 
 # Enable bazel auto completion.
 echo "source /usr/local/lib/bazel/bin/bazel-complete.bash" >> ~/.bashrc
diff --git a/tensorflow/tools/ci_build/rel/ubuntu/cpu_arm64_pip.sh b/tensorflow/tools/ci_build/rel/ubuntu/cpu_arm64_pip.sh
new file mode 100644
index 00000000000000..a745c89e74070f
--- /dev/null
+++ b/tensorflow/tools/ci_build/rel/ubuntu/cpu_arm64_pip.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+set -e
+set -x
+
+source tensorflow/tools/ci_build/release/common.sh
+
+# Update bazel
+install_bazelisk
+
+# Env vars used to avoid interactive elements of the build.
+export HOST_C_COMPILER=(which gcc)
+export HOST_CXX_COMPILER=(which g++)
+export TF_ENABLE_XLA=1
+export TF_DOWNLOAD_CLANG=0
+export TF_SET_ANDROID_WORKSPACE=0
+export TF_NEED_MPI=0
+export TF_NEED_ROCM=0
+export TF_NEED_GCP=0
+export TF_NEED_S3=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_CUDA=0
+export TF_NEED_HDFS=0
+export TF_NEED_OPENCL=0
+export TF_NEED_JEMALLOC=1
+export TF_NEED_VERBS=0
+export TF_NEED_AWS=0
+export TF_NEED_GDR=0
+export TF_NEED_OPENCL_SYCL=0
+export TF_NEED_COMPUTECPP=0
+export TF_NEED_KAFKA=0
+export TF_NEED_TENSORRT=0
+
+# Export required variables for running pip_new.sh
+export OS_TYPE="UBUNTU"
+export CONTAINER_TYPE="CPU"
+
+# Get the default test targets for bazel.
+source tensorflow/tools/ci_build/build_scripts/DEFAULT_TEST_TARGETS.sh
+
+# Set python version string
+py_ver=$(python -c 'import sys; print(str(sys.version_info.major)+str(sys.version_info.minor))')
+
+# Export optional variables for running pip_new.sh
+export TF_BUILD_FLAGS="--config=mkl_aarch64 --copt=-mtune=generic --copt=-march=armv8-a \
+    --copt=-O3 --copt=-fopenmp --copt=-flax-vector-conversions --linkopt=-lgomp"
+export TF_TEST_FLAGS="${TF_BUILD_FLAGS} \
+    --test_env=TF_ENABLE_ONEDNN_OPTS=1 --test_env=TF2_BEHAVIOR=1 --test_lang_filters=py \
+    --define=no_tensorflow_py_deps=true --verbose_failures=true --test_keep_going"
+export TF_TEST_TARGETS="${DEFAULT_BAZEL_TARGETS} \
+    -//tensorflow/lite/... \
+    -//tensorflow/compiler/mlir/lite/tests:const-fold.mlir.test \
+    -//tensorflow/compiler/mlir/lite/tests:prepare-tf.mlir.test \
+    -//tensorflow/python:nn_grad_test \
+    -//tensorflow/python/eager:forwardprop_test \
+    -//tensorflow/python/framework:node_file_writer_test \
+    -//tensorflow/python/grappler:memory_optimizer_test \
+    -//tensorflow/python/keras/engine:training_arrays_test \
+    -//tensorflow/python/keras/layers:convolutional_recurrent_test \
+    -//tensorflow/python/kernel_tests/linalg:linear_operator_householder_test \
+    -//tensorflow/python/kernel_tests/linalg:linear_operator_inversion_test \
+    -//tensorflow/python/kernel_tests/linalg:linear_operator_block_diag_test \
+    -//tensorflow/python/kernel_tests/linalg:linear_operator_block_lower_triangular_test \
+    -//tensorflow/python/kernel_tests/linalg:linear_operator_kronecker_test \
+    -//tensorflow/python/kernel_tests/math_ops:batch_matmul_op_test \
+    -//tensorflow/python/kernel_tests/nn_ops:conv_ops_test \
+    -//tensorflow/python/kernel_tests/nn_ops:conv2d_backprop_filter_grad_test \
+    -//tensorflow/python/kernel_tests/nn_ops:conv3d_backprop_filter_v2_grad_test \
+    -//tensorflow/python/kernel_tests/nn_ops:atrous_conv2d_test \
+    -//tensorflow/python/ops/parallel_for:math_test"
+export TF_PIP_TESTS="test_pip_virtualenv_clean"
+export TF_TEST_FILTER_TAGS="-no_oss,-oss_serial,-no_oss_py${py_ver},-gpu,-tpu,-benchmark-test,-v1only,-no_aarch64,-requires-gpu"
+export IS_NIGHTLY=0
+export TF_PROJECT_NAME="tensorflow_cpu_aws"
+export TF_PIP_TEST_ROOT="pip_test"
+export TF_AUDITWHEEL_TARGET_PLAT="manylinux2014"
+
+source tensorflow/tools/ci_build/builds/pip_new.sh
+
+# remove duplicate wheel and copy wheel to mounted volume for local access
+rm -rf /tensorflow/pip_test/whl/*linux_aarch64.whl && cp -r /tensorflow/pip_test/whl .
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index dd82501f77ad7e..c510af5b0ad490 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -62,7 +62,12 @@ function install_bazelisk {
   date
   case "$(uname -s)" in
     Darwin) local name=bazelisk-darwin-amd64 ;;
-    Linux)  local name=bazelisk-linux-amd64  ;;
+    Linux)
+      case "$(uname -m)" in
+       x86_64) local name=bazelisk-linux-amd64 ;;
+       aarch64) local name=bazelisk-linux-arm64 ;;
+       *) die "Unknown machine type: $(uname -m)" ;;
+      esac ;;
     *) die "Unknown OS: $(uname -s)" ;;
   esac
   mkdir -p "$HOME/bin"
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 09d82821222418..f3846b0fb2cfc4 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@
 # result for pip.
 # Also update tensorflow/tensorflow.bzl and
 # tensorflow/core/public/version.h
-_VERSION = '2.9.0'
+_VERSION = '2.9.3'
 
 
 # We use the same setup.py for all tensorflow_* packages and for the nightly
@@ -87,7 +87,14 @@ def standard_or_nightly(standard, nightly):
     'numpy >= 1.20',
     'opt_einsum >= 2.3.2',
     'packaging',
-    'protobuf >= 3.9.2',
+    # TODO(b/182876485): Protobuf 3.20 results in linker errors on Windows
+    # Protobuf 4.0 is binary incompatible with what C++ TF uses.
+    # We need ~1 quarter to update properly.
+    # See also: https://github.com/tensorflow/tensorflow/issues/53234
+    # See also: https://github.com/protocolbuffers/protobuf/issues/9954
+    # See also: https://github.com/tensorflow/tensorflow/issues/56077
+    # This is a temporary patch for now, to patch previous TF releases.
+    'protobuf >= 3.9.2, < 3.20',
     'setuptools',
     'six >= 1.12.0',
     'termcolor >= 1.1.0',
diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl
index 77c76aa6e20502..2cb16d253cf483 100644
--- a/tensorflow/workspace2.bzl
+++ b/tensorflow/workspace2.bzl
@@ -175,9 +175,9 @@ def _tf_repositories():
     tf_http_archive(
         name = "mkl_dnn_v1",
         build_file = "//third_party/mkl_dnn:mkldnn_v1.BUILD",
-        sha256 = "9695640f55acd833ddcef4776af15e03446c4655f9296e5074b1b178dd7a4fb2",
-        strip_prefix = "oneDNN-2.6",
-        urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/refs/tags/v2.6.tar.gz"),
+        sha256 = "fd47d2470000b5015f6e34134f799683050f81cbc04f53a3b0d323df1bb900bc",
+        strip_prefix = "oneDNN-2.6.3",
+        urls = tf_mirror_urls("https://github.com/oneapi-src/oneDNN/archive/refs/tags/v2.6.3.tar.gz"),
     )
 
     tf_http_archive(
@@ -300,10 +300,10 @@ def _tf_repositories():
     tf_http_archive(
         name = "org_sqlite",
         build_file = "//third_party:sqlite.BUILD",
-        sha256 = "b65d2b72ce1296bb4314bbca1bede332a0f789b08a17e3e6e2e7ce6e870cde92",
-        strip_prefix = "sqlite-amalgamation-3370100",
+        sha256 = "9c99955b21d2374f3a385d67a1f64cbacb1d4130947473d25c77ad609c03b4cd",
+        strip_prefix = "sqlite-amalgamation-3390400",
         system_build_file = "//third_party/systemlibs:sqlite.BUILD",
-        urls = tf_mirror_urls("https://www.sqlite.org/2021/sqlite-amalgamation-3370100.zip"),
+        urls = tf_mirror_urls("https://www.sqlite.org/2022/sqlite-amalgamation-3390400.zip"),
     )
 
     tf_http_archive(
@@ -483,10 +483,10 @@ def _tf_repositories():
     tf_http_archive(
         name = "curl",
         build_file = "//third_party:curl.BUILD",
-        sha256 = "93fb2cd4b880656b4e8589c912a9fd092750166d555166370247f09d18f5d0c0",
-        strip_prefix = "curl-7.83.1",
+        sha256 = "3c6893d38d054d4e378267166858698899e9d87258e8ff1419d020c395384535",
+        strip_prefix = "curl-7.84.0",
         system_build_file = "//third_party/systemlibs:curl.BUILD",
-        urls = tf_mirror_urls("https://curl.haxx.se/download/curl-7.83.1.tar.gz"),
+        urls = tf_mirror_urls("https://curl.haxx.se/download/curl-7.84.0.tar.gz"),
     )
 
     # WARNING: make sure ncteisen@ and vpai@ are cc-ed on any CL to change the below rule
diff --git a/third_party/cpuinfo/cpuinfo.BUILD b/third_party/cpuinfo/cpuinfo.BUILD
index eb2937d20ef2a6..e362682b810312 100644
--- a/third_party/cpuinfo/cpuinfo.BUILD
+++ b/third_party/cpuinfo/cpuinfo.BUILD
@@ -121,6 +121,7 @@ cc_library(
         ":ios_armv7": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
         ":ios_arm64": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
         ":ios_arm64e": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
+        ":ios_sim_arm64": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
         ":watchos_x86_64": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
         ":watchos_x86": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
         ":watchos_armv7k": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
@@ -297,6 +298,14 @@ config_setting(
     },
 )
 
+config_setting(
+    name = "ios_sim_arm64",
+    values = {
+        "apple_platform_type": "ios",
+        "cpu": "ios_sim_arm64",
+    },
+)
+
 config_setting(
     name = "ios_arm64e",
     values = {
diff --git a/third_party/curl.BUILD b/third_party/curl.BUILD
index 37bd3b4ee43f5f..508de07e36b246 100644
--- a/third_party/curl.BUILD
+++ b/third_party/curl.BUILD
@@ -121,12 +121,15 @@ cc_library(
         "lib/easyif.h",
         "lib/easyoptions.c",
         "lib/easyoptions.h",
+        "lib/easy_lock.h",
         "lib/escape.c",
         "lib/escape.h",
         "lib/file.c",
         "lib/file.h",
         "lib/fileinfo.c",
         "lib/fileinfo.h",
+        "lib/fopen.c",
+        "lib/fopen.h",
         "lib/formdata.c",
         "lib/formdata.h",
         "lib/ftp.c",
diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD
index 3f87ba5d931574..b962519bc51885 100644
--- a/third_party/mkl_dnn/mkldnn_v1.BUILD
+++ b/third_party/mkl_dnn/mkldnn_v1.BUILD
@@ -137,7 +137,7 @@ template_rule(
     substitutions = {
         "@DNNL_VERSION_MAJOR@": "2",
         "@DNNL_VERSION_MINOR@": "6",
-        "@DNNL_VERSION_PATCH@": "0",
+        "@DNNL_VERSION_PATCH@": "3",
         "@DNNL_VERSION_HASH@": "N/A",
     },
 )