diff --git a/official/README-TPU.md b/official/README-TPU.md
index 28a5a0a73d2..a6031c44f03 100644
--- a/official/README-TPU.md
+++ b/official/README-TPU.md
@@ -26,4 +26,7 @@
 *   [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA).
 
 ## Recommendation
+*   [dlrm](recommendation/ranking): [Deep Learning Recommendation Model for
+Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091).
+*   [dcn v2](recommendation/ranking): [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535).
 *   [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ).
diff --git a/official/modeling/optimization/configs/optimizer_config.py b/official/modeling/optimization/configs/optimizer_config.py
index 37f9db50f59..7b4de948248 100644
--- a/official/modeling/optimization/configs/optimizer_config.py
+++ b/official/modeling/optimization/configs/optimizer_config.py
@@ -180,11 +180,15 @@ class EMAConfig(BaseOptimizerConfig):
 
   Attributes:
     name: 'str', name of the optimizer.
+    trainable_weights_only: 'bool', if True, only model trainable weights will
+      be updated. Otherwise, all model weights will be updated. This mainly
+      affects batch normalization parameters.
     average_decay: 'float', average decay value.
     start_step: 'int', start step to apply moving average.
     dynamic_decay: 'bool', whether to apply dynamic decay or not.
   """
   name: str = "ExponentialMovingAverage"
+  trainable_weights_only: bool = True
   average_decay: float = 0.99
   start_step: int = 0
   dynamic_decay: bool = True
diff --git a/official/modeling/optimization/ema_optimizer.py b/official/modeling/optimization/ema_optimizer.py
index 5c746ad7d1a..3bf3c3607df 100644
--- a/official/modeling/optimization/ema_optimizer.py
+++ b/official/modeling/optimization/ema_optimizer.py
@@ -48,6 +48,7 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer):
 
   def __init__(self,
                optimizer: tf.keras.optimizers.Optimizer,
+               trainable_weights_only: bool = True,
                average_decay: float = 0.99,
                start_step: int = 0,
                dynamic_decay: bool = True,
@@ -58,6 +59,9 @@ def __init__(self,
     Args:
       optimizer: `tf.keras.optimizers.Optimizer` that will be
         used to compute and apply gradients.
+      trainable_weights_only: 'bool', if True, only model trainable weights will
+        be updated. Otherwise, all model weights will be updated. This mainly
+        affects batch normalization parameters.
       average_decay: float. Decay to use to maintain the moving averages
         of trained variables.
       start_step: int. What step to start the moving average.
@@ -72,6 +76,7 @@ def __init__(self,
     """
     super().__init__(name, **kwargs)
     self._average_decay = average_decay
+    self._trainable_weights_only = trainable_weights_only
     self._start_step = tf.constant(start_step, tf.float32)
     self._dynamic_decay = dynamic_decay
     self._optimizer = optimizer
@@ -81,12 +86,17 @@ def __init__(self,
 
   def shadow_copy(self, model: tf.keras.Model):
     """Creates shadow variables for the given model weights."""
-    for var in model.weights:
+
+    if self._trainable_weights_only:
+      self._model_weights = model.trainable_variables
+    else:
+      self._model_weights = model.variables
+    for var in self._model_weights:
       self.add_slot(var, 'average', initializer='zeros')
+
     self._average_weights = [
-        self.get_slot(var, 'average') for var in model.weights
+        self.get_slot(var, 'average') for var in self._model_weights
     ]
-    self._model_weights = model.weights
 
   @property
   def has_shadow_copy(self):
diff --git a/official/nlp/data/classifier_data_lib.py b/official/nlp/data/classifier_data_lib.py
index 222485a9f4f..2498c327094 100644
--- a/official/nlp/data/classifier_data_lib.py
+++ b/official/nlp/data/classifier_data_lib.py
@@ -1316,8 +1316,8 @@ def _create_examples(self, lines, set_type):
     return examples
 
 
-class SuperGLUERTEProcessor(DataProcessor):
-  """Processor for the RTE dataset (SuperGLUE version)."""
+class SuperGLUEDataProcessor(DataProcessor):
+  """Processor for the SuperGLUE dataset."""
 
   def get_train_examples(self, data_dir):
     """See base class."""
@@ -1334,6 +1334,70 @@ def get_test_examples(self, data_dir):
     return self._create_examples(
         self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test")
 
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    raise NotImplementedError()
+
+
+class BoolQProcessor(SuperGLUEDataProcessor):
+  """Processor for the BoolQ dataset (SuperGLUE diagnostics dataset)."""
+
+  def get_labels(self):
+    """See base class."""
+    return ["True", "False"]
+
+  @staticmethod
+  def get_processor_name():
+    """See base class."""
+    return "BoolQ"
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    examples = []
+    for line in lines:
+      guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"])))
+      text_a = self.process_text_fn(line["question"])
+      text_b = self.process_text_fn(line["passage"])
+      if set_type == "test":
+        label = "False"
+      else:
+        label = str(line["label"])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class CBProcessor(SuperGLUEDataProcessor):
+  """Processor for the CB dataset (SuperGLUE diagnostics dataset)."""
+
+  def get_labels(self):
+    """See base class."""
+    return ["entailment", "neutral", "contradiction"]
+
+  @staticmethod
+  def get_processor_name():
+    """See base class."""
+    return "CB"
+
+  def _create_examples(self, lines, set_type):
+    """Creates examples for the training/dev/test sets."""
+    examples = []
+    for line in lines:
+      guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"])))
+      text_a = self.process_text_fn(line["premise"])
+      text_b = self.process_text_fn(line["hypothesis"])
+      if set_type == "test":
+        label = "entailment"
+      else:
+        label = self.process_text_fn(line["label"])
+      examples.append(
+          InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class SuperGLUERTEProcessor(SuperGLUEDataProcessor):
+  """Processor for the RTE dataset (SuperGLUE version)."""
+
   def get_labels(self):
     """See base class."""
     # All datasets are converted to 2-class split, where for 3-class datasets we
diff --git a/official/nlp/data/create_finetuning_data.py b/official/nlp/data/create_finetuning_data.py
index 14b2bbc0463..9d31c9a5000 100644
--- a/official/nlp/data/create_finetuning_data.py
+++ b/official/nlp/data/create_finetuning_data.py
@@ -50,7 +50,7 @@
     "classification_task_name", "MNLI", [
         "AX", "COLA", "IMDB", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE",
         "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", "XTREME-PAWS-X",
-        "AX-g", "SUPERGLUE-RTE"
+        "AX-g", "SUPERGLUE-RTE", "CB", "BoolQ"
     ], "The name of the task to train BERT classifier. The "
     "difference between XTREME-XNLI and XNLI is: 1. the format "
     "of input tsv files; 2. the dev set for XTREME is english "
@@ -243,7 +243,11 @@ def generate_classifier_dataset():
         "ax-g":
             classifier_data_lib.AXgProcessor,
         "superglue-rte":
-            classifier_data_lib.SuperGLUERTEProcessor
+            classifier_data_lib.SuperGLUERTEProcessor,
+        "cb":
+            classifier_data_lib.CBProcessor,
+        "boolq":
+            classifier_data_lib.BoolQProcessor,
     }
     task_name = FLAGS.classification_task_name.lower()
     if task_name not in processors:
diff --git a/official/nlp/finetuning/superglue/run_superglue.py b/official/nlp/finetuning/superglue/run_superglue.py
index bac41e0a129..01025a88f93 100644
--- a/official/nlp/finetuning/superglue/run_superglue.py
+++ b/official/nlp/finetuning/superglue/run_superglue.py
@@ -27,6 +27,7 @@
 
 from official.common import distribute_utils
 # Imports registered experiment configs.
+from official.common import registry_imports  # pylint: disable=unused-import
 from official.core import exp_factory
 from official.core import task_factory
 from official.core import train_lib
@@ -64,6 +65,8 @@
 
 AXG_CLASS_NAMES = ['entailment', 'not_entailment']
 RTE_CLASS_NAMES = ['entailment', 'not_entailment']
+CB_CLASS_NAMES = ['entailment', 'neutral', 'contradiction']
+BOOLQ_CLASS_NAMES = ['True', 'False']
 
 
 def _override_exp_config_by_file(exp_config, exp_config_files):
@@ -153,7 +156,9 @@ def _write_submission_file(task, seq_length):
   write_fn = binary_helper.write_superglue_classification
   write_fn_map = {
       'RTE': functools.partial(write_fn, class_names=RTE_CLASS_NAMES),
-      'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES)
+      'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES),
+      'CB': functools.partial(write_fn, class_names=CB_CLASS_NAMES),
+      'BoolQ': functools.partial(write_fn, class_names=BOOLQ_CLASS_NAMES)
   }
   logging.info('Predicting %s', FLAGS.test_input_path)
   write_fn_map[FLAGS.task_name](
diff --git a/official/recommendation/ranking/README.md b/official/recommendation/ranking/README.md
index 1d42b4f278d..9c2ca21039f 100644
--- a/official/recommendation/ranking/README.md
+++ b/official/recommendation/ranking/README.md
@@ -16,8 +16,8 @@ When training on TPUs we use
 [TPUEmbedding layer](https://github.com/tensorflow/recommenders/blob/main/tensorflow_recommenders/layers/embedding/tpu_embedding_layer.py)
 for categorical features. TPU embedding supports large embedding tables with
 fast lookup, the size of embedding tables scales linearly with the size of TPU
-pod. We can have up to 96 GB embedding tables for TPU v3-8 and 6.14 TB for
-v3-512 and 24.6 TB for TPU Pod v3-2048.
+pod. We can have up to 90 GB embedding tables for TPU v3-8 and 5.6 TB for
+v3-512 and 22,4 TB for TPU Pod v3-2048.
 
 The Model code is in
 [TensorFlow Recommenders](https://github.com/tensorflow/recommenders/tree/main/tensorflow_recommenders/experimental/models)
@@ -25,16 +25,30 @@ library, while input pipeline, configuration and training loop is here.
 
 ## Prerequisites
 To get started, download the code from TensorFlow models GitHub repository or
-use the pre-installed Google Cloud VM. We also need to install [TensorFlow
-Recommenders](https://www.tensorflow.org/recommenders) library.
+use the pre-installed Google Cloud VM.
 
 ```bash
 git clone https://github.com/tensorflow/models.git
-pip install -r models/official/requirements.txt
 export PYTHONPATH=$PYTHONPATH:$(pwd)/models
 ```
 
-Make sure to use TensorFlow 2.4+.
+We also need to install
+[TensorFlow Recommenders](https://www.tensorflow.org/recommenders) library.
+If you are using [tf-nightly](https://pypi.org/project/tf-nightly/) make
+sure to install
+[tensorflow-recommenders](https://pypi.org/project/tensorflow-recommenders/)
+without its dependancies by passing `--no-deps` argument.
+
+For tf-nightly:
+```bash
+pip install tensorflow-recommenders --no-deps
+```
+
+For stable TensorFlow 2.4+ [releases](https://pypi.org/project/tensorflow/):
+```bash
+pip install tensorflow-recommenders
+```
+
 
 ## Dataset
 
@@ -98,10 +112,10 @@ export EXPERIMENT_NAME=my_experiment_name
 export BUCKET_NAME="gs://my_dlrm_bucket"
 export DATA_DIR="${BUCKET_NAME}/data"
 
-python3 official/recommendation/ranking/main.py --mode=train_and_eval \
+python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \
 --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override="
 runtime:
-    distribution_strategy='tpu'
+    distribution_strategy: 'tpu'
 task:
     use_synthetic_data: false
     train_data:
@@ -125,7 +139,7 @@ trainer:
     checkpoint_interval: 100000
     validation_steps: 5440
     train_steps: 256054
-    steps_per_execution: 1000
+    steps_per_loop: 1000
 "
 ```
 
diff --git a/official/requirements.txt b/official/requirements.txt
index 0c734c580b0..74028adcb55 100644
--- a/official/requirements.txt
+++ b/official/requirements.txt
@@ -12,7 +12,6 @@ tensorflow-hub>=0.6.0
 tensorflow-model-optimization>=0.4.1
 tensorflow-datasets
 tensorflow-addons
-tensorflow-recommenders>=0.5.0
 dataclasses;python_version<"3.7"
 gin-config
 tf_slim>=1.1.0
diff --git a/official/utils/testing/scripts/presubmit.sh b/official/utils/testing/scripts/presubmit.sh
index 954d96df7f8..33eca3cbb41 100755
--- a/official/utils/testing/scripts/presubmit.sh
+++ b/official/utils/testing/scripts/presubmit.sh
@@ -31,8 +31,8 @@ py_test() {
   local exit_code=0
 
   echo "===========Running Python test============"
-
-  for test_file in `find official/ -name '*test.py' -print`
+  # Skipping Ranking tests, TODO(b/189265753) remove it once the issue is fixed.
+  for test_file in `find official/ -name '*test.py' -print | grep -v 'official/recommendation/ranking'`
   do
     echo "####=======Testing ${test_file}=======####"
     ${PY_BINARY} "${test_file}"
diff --git a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
index 3d68f539601..83875d1273a 100644
--- a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml
@@ -80,6 +80,7 @@ trainer:
   optimizer_config:
     ema:
       average_decay: 0.9999
+      trainable_weights_only: false
     learning_rate:
       cosine:
         decay_steps: 73682
diff --git a/official/vision/beta/configs/image_classification.py b/official/vision/beta/configs/image_classification.py
index e80c85f87fd..7044a4c0004 100644
--- a/official/vision/beta/configs/image_classification.py
+++ b/official/vision/beta/configs/image_classification.py
@@ -227,7 +227,8 @@ def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig:
                   }
               },
               'ema': {
-                  'average_decay': 0.9999
+                  'average_decay': 0.9999,
+                  'trainable_weights_only': False,
               },
               'learning_rate': {
                   'type': 'cosine',
diff --git a/official/vision/beta/configs/video_classification.py b/official/vision/beta/configs/video_classification.py
index b6ede36172e..d6d3c9499a1 100644
--- a/official/vision/beta/configs/video_classification.py
+++ b/official/vision/beta/configs/video_classification.py
@@ -254,7 +254,12 @@ def video_classification_ucf101() -> cfg.ExperimentConfig:
           'task.validation_data.is_training != None',
           'task.train_data.num_classes == task.validation_data.num_classes',
       ])
-  add_trainer(config, train_batch_size=64, eval_batch_size=16, train_epochs=100)
+  add_trainer(
+      config,
+      train_batch_size=64,
+      eval_batch_size=16,
+      learning_rate=0.8,
+      train_epochs=100)
   return config
 
 
diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
index 1dbaae6ebf1..ae66e3797cf 100644
--- a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
+++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py
@@ -331,13 +331,13 @@ def get_raw_depths(self, minimum_depth, inputs):
 
     Args:
       minimum_depth: `int` depth of the smallest branch of the FPN.
-      inputs: `dict[str, tf.InputSpec]` of the shape of input args as a dictionary of
-        lists.
+      inputs: `dict[str, tf.InputSpec]` of the shape of input args as a
+        dictionary of lists.
 
     Returns:
       The unscaled depths of the FPN branches.
     """
-    
+
     depths = []
     if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1:
       for i in range(self._min_level, self._max_level + 1):
@@ -386,8 +386,8 @@ def __init__(self,
                kernel_regularizer=None,
                bias_regularizer=None,
                **kwargs):
-    """Yolo Decoder initialization function. A unified model that ties all decoder
-    components into a conditionally build YOLO decder.
+    """Yolo Decoder initialization function. A unified model that ties all
+    decoder components into a conditionally build YOLO decoder.
 
     Args:
       input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs
@@ -409,7 +409,7 @@ def __init__(self,
         zero.
       kernel_initializer: kernel_initializer for convolutional layers.
       kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
-      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d.
+      bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D.
       **kwargs: keyword arguments to be passed.
     """
 
diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
index 9897def3ad3..02895ff3db4 100644
--- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
+++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py
@@ -1152,8 +1152,8 @@ def build(self, input_shape):
 
   def call(self, inputs, training=None):
     if self._use_pooling:
-      depth_max = tf.reduce_max(inputs, axis=-1, keep_dims=True)
-      depth_avg = tf.reduce_mean(inputs, axis=-1, keep_dims=True)
+      depth_max = tf.reduce_max(inputs, axis=-1, keepdims=True)
+      depth_avg = tf.reduce_mean(inputs, axis=-1, keepdims=True)
       input_maps = tf.concat([depth_avg, depth_max], axis=-1)
     else:
       input_maps = inputs
@@ -1545,7 +1545,7 @@ def build(self, input_shape):
       elif layer == 'spp':
         self.layers.append(self._spp(self._filters, dark_conv_args))
       elif layer == 'sam':
-        self.layers.append(self._sam(-1, _args))
+        self.layers.append(self._sam(-1, dark_conv_args))
 
     self._lim = len(self.layers)
     super().build(input_shape)