From cee98c903bbeccfffe5636c6fdfb4805edcaa1fc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 24 May 2021 21:08:47 -0700 Subject: [PATCH 01/10] Internal change PiperOrigin-RevId: 375621932 --- official/nlp/finetuning/superglue/run_superglue.py | 1 + 1 file changed, 1 insertion(+) diff --git a/official/nlp/finetuning/superglue/run_superglue.py b/official/nlp/finetuning/superglue/run_superglue.py index bac41e0a129..8797e16e476 100644 --- a/official/nlp/finetuning/superglue/run_superglue.py +++ b/official/nlp/finetuning/superglue/run_superglue.py @@ -27,6 +27,7 @@ from official.common import distribute_utils # Imports registered experiment configs. +from official.common import registry_imports # pylint: disable=unused-import from official.core import exp_factory from official.core import task_factory from official.core import train_lib From 3d03e675d444186a49a665ce2e1be32a24c59215 Mon Sep 17 00:00:00 2001 From: Yeqing Li Date: Tue, 25 May 2021 09:41:10 -0700 Subject: [PATCH 02/10] Updates default learning rate for UCF-101 experiment. PiperOrigin-RevId: 375725859 --- official/vision/beta/configs/video_classification.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/official/vision/beta/configs/video_classification.py b/official/vision/beta/configs/video_classification.py index b6ede36172e..d6d3c9499a1 100644 --- a/official/vision/beta/configs/video_classification.py +++ b/official/vision/beta/configs/video_classification.py @@ -254,7 +254,12 @@ def video_classification_ucf101() -> cfg.ExperimentConfig: 'task.validation_data.is_training != None', 'task.train_data.num_classes == task.validation_data.num_classes', ]) - add_trainer(config, train_batch_size=64, eval_batch_size=16, train_epochs=100) + add_trainer( + config, + train_batch_size=64, + eval_batch_size=16, + learning_rate=0.8, + train_epochs=100) return config From cda3bca5d53b6a09d8c0a3e2952feba297cbc096 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 May 2021 09:58:42 -0700 Subject: [PATCH 03/10] Updating READMEs for DLRM Model. PiperOrigin-RevId: 375729667 --- official/README-TPU.md | 3 +++ official/recommendation/ranking/README.md | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/official/README-TPU.md b/official/README-TPU.md index 28a5a0a73d2..a6031c44f03 100644 --- a/official/README-TPU.md +++ b/official/README-TPU.md @@ -26,4 +26,7 @@ * [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA). ## Recommendation +* [dlrm](recommendation/ranking): [Deep Learning Recommendation Model for +Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091). +* [dcn v2](recommendation/ranking): [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535). * [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ). diff --git a/official/recommendation/ranking/README.md b/official/recommendation/ranking/README.md index 1d42b4f278d..dff361a2378 100644 --- a/official/recommendation/ranking/README.md +++ b/official/recommendation/ranking/README.md @@ -16,8 +16,8 @@ When training on TPUs we use [TPUEmbedding layer](https://github.com/tensorflow/recommenders/blob/main/tensorflow_recommenders/layers/embedding/tpu_embedding_layer.py) for categorical features. TPU embedding supports large embedding tables with fast lookup, the size of embedding tables scales linearly with the size of TPU -pod. We can have up to 96 GB embedding tables for TPU v3-8 and 6.14 TB for -v3-512 and 24.6 TB for TPU Pod v3-2048. +pod. We can have up to 90 GB embedding tables for TPU v3-8 and 5.6 TB for +v3-512 and 22,4 TB for TPU Pod v3-2048. The Model code is in [TensorFlow Recommenders](https://github.com/tensorflow/recommenders/tree/main/tensorflow_recommenders/experimental/models) @@ -30,7 +30,7 @@ Recommenders](https://www.tensorflow.org/recommenders) library. ```bash git clone https://github.com/tensorflow/models.git -pip install -r models/official/requirements.txt +pip install tensorflow-recommenders export PYTHONPATH=$PYTHONPATH:$(pwd)/models ``` @@ -98,10 +98,10 @@ export EXPERIMENT_NAME=my_experiment_name export BUCKET_NAME="gs://my_dlrm_bucket" export DATA_DIR="${BUCKET_NAME}/data" -python3 official/recommendation/ranking/main.py --mode=train_and_eval \ +python3 models/official/recommendation/ranking/train.py --mode=train_and_eval \ --model_dir=${BUCKET_NAME}/model_dirs/${EXPERIMENT_NAME} --params_override=" runtime: - distribution_strategy='tpu' + distribution_strategy: 'tpu' task: use_synthetic_data: false train_data: @@ -125,7 +125,7 @@ trainer: checkpoint_interval: 100000 validation_steps: 5440 train_steps: 256054 - steps_per_execution: 1000 + steps_per_loop: 1000 " ``` From 50905fd236c25fcb4841528cc4f05f8e35a63836 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 May 2021 16:56:09 -0700 Subject: [PATCH 04/10] Removing tensorflow recommenders library from requirement.txt. PiperOrigin-RevId: 375826079 --- official/recommendation/ranking/README.md | 22 +++++++++++++++++---- official/requirements.txt | 1 - official/utils/testing/scripts/presubmit.sh | 4 ++-- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/official/recommendation/ranking/README.md b/official/recommendation/ranking/README.md index dff361a2378..9c2ca21039f 100644 --- a/official/recommendation/ranking/README.md +++ b/official/recommendation/ranking/README.md @@ -25,16 +25,30 @@ library, while input pipeline, configuration and training loop is here. ## Prerequisites To get started, download the code from TensorFlow models GitHub repository or -use the pre-installed Google Cloud VM. We also need to install [TensorFlow -Recommenders](https://www.tensorflow.org/recommenders) library. +use the pre-installed Google Cloud VM. ```bash git clone https://github.com/tensorflow/models.git -pip install tensorflow-recommenders export PYTHONPATH=$PYTHONPATH:$(pwd)/models ``` -Make sure to use TensorFlow 2.4+. +We also need to install +[TensorFlow Recommenders](https://www.tensorflow.org/recommenders) library. +If you are using [tf-nightly](https://pypi.org/project/tf-nightly/) make +sure to install +[tensorflow-recommenders](https://pypi.org/project/tensorflow-recommenders/) +without its dependancies by passing `--no-deps` argument. + +For tf-nightly: +```bash +pip install tensorflow-recommenders --no-deps +``` + +For stable TensorFlow 2.4+ [releases](https://pypi.org/project/tensorflow/): +```bash +pip install tensorflow-recommenders +``` + ## Dataset diff --git a/official/requirements.txt b/official/requirements.txt index 0c734c580b0..74028adcb55 100644 --- a/official/requirements.txt +++ b/official/requirements.txt @@ -12,7 +12,6 @@ tensorflow-hub>=0.6.0 tensorflow-model-optimization>=0.4.1 tensorflow-datasets tensorflow-addons -tensorflow-recommenders>=0.5.0 dataclasses;python_version<"3.7" gin-config tf_slim>=1.1.0 diff --git a/official/utils/testing/scripts/presubmit.sh b/official/utils/testing/scripts/presubmit.sh index 954d96df7f8..33eca3cbb41 100755 --- a/official/utils/testing/scripts/presubmit.sh +++ b/official/utils/testing/scripts/presubmit.sh @@ -31,8 +31,8 @@ py_test() { local exit_code=0 echo "===========Running Python test============" - - for test_file in `find official/ -name '*test.py' -print` + # Skipping Ranking tests, TODO(b/189265753) remove it once the issue is fixed. + for test_file in `find official/ -name '*test.py' -print | grep -v 'official/recommendation/ranking'` do echo "####=======Testing ${test_file}=======####" ${PY_BINARY} "${test_file}" From fda53f7875bfe56d00fe0faeb0064ea8fc5ecf60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 25 May 2021 20:25:28 -0700 Subject: [PATCH 05/10] Internal change PiperOrigin-RevId: 375854504 --- official/nlp/data/classifier_data_lib.py | 86 +++++++++++++++++++ official/nlp/data/create_finetuning_data.py | 8 +- .../nlp/finetuning/superglue/run_superglue.py | 6 +- 3 files changed, 97 insertions(+), 3 deletions(-) diff --git a/official/nlp/data/classifier_data_lib.py b/official/nlp/data/classifier_data_lib.py index 222485a9f4f..168a2ae4390 100644 --- a/official/nlp/data/classifier_data_lib.py +++ b/official/nlp/data/classifier_data_lib.py @@ -1316,6 +1316,92 @@ def _create_examples(self, lines, set_type): return examples +class BoolQProcessor(DataProcessor): + """Processor for the BoolQ dataset (SuperGLUE diagnostics dataset).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") + + def get_labels(self): + """See base class.""" + return ["True", "False"] + + @staticmethod + def get_processor_name(): + """See base class.""" + return "BoolQ" + + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + examples = [] + for line in lines: + guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"]))) + text_a = self.process_text_fn(line["question"]) + text_b = self.process_text_fn(line["passage"]) + if set_type == "test": + label = "False" + else: + label = str(line["label"]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + +class CBProcessor(DataProcessor): + """Processor for the CB dataset (SuperGLUE diagnostics dataset).""" + + def get_train_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") + + def get_dev_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") + + def get_test_examples(self, data_dir): + """See base class.""" + return self._create_examples( + self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") + + def get_labels(self): + """See base class.""" + return ["entailment", "neutral", "contradiction"] + + @staticmethod + def get_processor_name(): + """See base class.""" + return "CB" + + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + examples = [] + for line in lines: + guid = "%s-%s" % (set_type, self.process_text_fn(str(line["idx"]))) + text_a = self.process_text_fn(line["premise"]) + text_b = self.process_text_fn(line["hypothesis"]) + if set_type == "test": + label = "entailment" + else: + label = self.process_text_fn(line["label"]) + examples.append( + InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label)) + return examples + + class SuperGLUERTEProcessor(DataProcessor): """Processor for the RTE dataset (SuperGLUE version).""" diff --git a/official/nlp/data/create_finetuning_data.py b/official/nlp/data/create_finetuning_data.py index 14b2bbc0463..9d31c9a5000 100644 --- a/official/nlp/data/create_finetuning_data.py +++ b/official/nlp/data/create_finetuning_data.py @@ -50,7 +50,7 @@ "classification_task_name", "MNLI", [ "AX", "COLA", "IMDB", "MNLI", "MRPC", "PAWS-X", "QNLI", "QQP", "RTE", "SST-2", "STS-B", "WNLI", "XNLI", "XTREME-XNLI", "XTREME-PAWS-X", - "AX-g", "SUPERGLUE-RTE" + "AX-g", "SUPERGLUE-RTE", "CB", "BoolQ" ], "The name of the task to train BERT classifier. The " "difference between XTREME-XNLI and XNLI is: 1. the format " "of input tsv files; 2. the dev set for XTREME is english " @@ -243,7 +243,11 @@ def generate_classifier_dataset(): "ax-g": classifier_data_lib.AXgProcessor, "superglue-rte": - classifier_data_lib.SuperGLUERTEProcessor + classifier_data_lib.SuperGLUERTEProcessor, + "cb": + classifier_data_lib.CBProcessor, + "boolq": + classifier_data_lib.BoolQProcessor, } task_name = FLAGS.classification_task_name.lower() if task_name not in processors: diff --git a/official/nlp/finetuning/superglue/run_superglue.py b/official/nlp/finetuning/superglue/run_superglue.py index 8797e16e476..01025a88f93 100644 --- a/official/nlp/finetuning/superglue/run_superglue.py +++ b/official/nlp/finetuning/superglue/run_superglue.py @@ -65,6 +65,8 @@ AXG_CLASS_NAMES = ['entailment', 'not_entailment'] RTE_CLASS_NAMES = ['entailment', 'not_entailment'] +CB_CLASS_NAMES = ['entailment', 'neutral', 'contradiction'] +BOOLQ_CLASS_NAMES = ['True', 'False'] def _override_exp_config_by_file(exp_config, exp_config_files): @@ -154,7 +156,9 @@ def _write_submission_file(task, seq_length): write_fn = binary_helper.write_superglue_classification write_fn_map = { 'RTE': functools.partial(write_fn, class_names=RTE_CLASS_NAMES), - 'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES) + 'AX-g': functools.partial(write_fn, class_names=AXG_CLASS_NAMES), + 'CB': functools.partial(write_fn, class_names=CB_CLASS_NAMES), + 'BoolQ': functools.partial(write_fn, class_names=BOOLQ_CLASS_NAMES) } logging.info('Predicting %s', FLAGS.test_input_path) write_fn_map[FLAGS.task_name]( From 081bee8c681eaf87b89c2ca8b63c0e913f42da95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 26 May 2021 10:45:03 -0700 Subject: [PATCH 06/10] Internal change PiperOrigin-RevId: 375979776 --- official/nlp/data/classifier_data_lib.py | 46 +++++++----------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/official/nlp/data/classifier_data_lib.py b/official/nlp/data/classifier_data_lib.py index 168a2ae4390..2498c327094 100644 --- a/official/nlp/data/classifier_data_lib.py +++ b/official/nlp/data/classifier_data_lib.py @@ -1316,8 +1316,8 @@ def _create_examples(self, lines, set_type): return examples -class BoolQProcessor(DataProcessor): - """Processor for the BoolQ dataset (SuperGLUE diagnostics dataset).""" +class SuperGLUEDataProcessor(DataProcessor): + """Processor for the SuperGLUE dataset.""" def get_train_examples(self, data_dir): """See base class.""" @@ -1334,6 +1334,14 @@ def get_test_examples(self, data_dir): return self._create_examples( self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") + def _create_examples(self, lines, set_type): + """Creates examples for the training/dev/test sets.""" + raise NotImplementedError() + + +class BoolQProcessor(SuperGLUEDataProcessor): + """Processor for the BoolQ dataset (SuperGLUE diagnostics dataset).""" + def get_labels(self): """See base class.""" return ["True", "False"] @@ -1359,24 +1367,9 @@ def _create_examples(self, lines, set_type): return examples -class CBProcessor(DataProcessor): +class CBProcessor(SuperGLUEDataProcessor): """Processor for the CB dataset (SuperGLUE diagnostics dataset).""" - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") - def get_labels(self): """See base class.""" return ["entailment", "neutral", "contradiction"] @@ -1402,24 +1395,9 @@ def _create_examples(self, lines, set_type): return examples -class SuperGLUERTEProcessor(DataProcessor): +class SuperGLUERTEProcessor(SuperGLUEDataProcessor): """Processor for the RTE dataset (SuperGLUE version).""" - def get_train_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "train.jsonl")), "train") - - def get_dev_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "val.jsonl")), "dev") - - def get_test_examples(self, data_dir): - """See base class.""" - return self._create_examples( - self._read_jsonl(os.path.join(data_dir, "test.jsonl")), "test") - def get_labels(self): """See base class.""" # All datasets are converted to 2-class split, where for 3-class datasets we From 27be57eba9566e856fd8ebfa008212b91ff92000 Mon Sep 17 00:00:00 2001 From: Abdullah Rashwan Date: Wed, 26 May 2021 14:41:24 -0700 Subject: [PATCH 07/10] Internal change PiperOrigin-RevId: 376032565 --- .../optimization/configs/optimizer_config.py | 4 ++++ official/modeling/optimization/ema_optimizer.py | 16 +++++++++++++--- .../k400_resnet3drs_50_tpu.yaml | 1 + .../vision/beta/configs/image_classification.py | 3 ++- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/official/modeling/optimization/configs/optimizer_config.py b/official/modeling/optimization/configs/optimizer_config.py index 37f9db50f59..7b4de948248 100644 --- a/official/modeling/optimization/configs/optimizer_config.py +++ b/official/modeling/optimization/configs/optimizer_config.py @@ -180,11 +180,15 @@ class EMAConfig(BaseOptimizerConfig): Attributes: name: 'str', name of the optimizer. + trainable_weights_only: 'bool', if True, only model trainable weights will + be updated. Otherwise, all model weights will be updated. This mainly + affects batch normalization parameters. average_decay: 'float', average decay value. start_step: 'int', start step to apply moving average. dynamic_decay: 'bool', whether to apply dynamic decay or not. """ name: str = "ExponentialMovingAverage" + trainable_weights_only: bool = True average_decay: float = 0.99 start_step: int = 0 dynamic_decay: bool = True diff --git a/official/modeling/optimization/ema_optimizer.py b/official/modeling/optimization/ema_optimizer.py index 5c746ad7d1a..3bf3c3607df 100644 --- a/official/modeling/optimization/ema_optimizer.py +++ b/official/modeling/optimization/ema_optimizer.py @@ -48,6 +48,7 @@ class ExponentialMovingAverage(tf.keras.optimizers.Optimizer): def __init__(self, optimizer: tf.keras.optimizers.Optimizer, + trainable_weights_only: bool = True, average_decay: float = 0.99, start_step: int = 0, dynamic_decay: bool = True, @@ -58,6 +59,9 @@ def __init__(self, Args: optimizer: `tf.keras.optimizers.Optimizer` that will be used to compute and apply gradients. + trainable_weights_only: 'bool', if True, only model trainable weights will + be updated. Otherwise, all model weights will be updated. This mainly + affects batch normalization parameters. average_decay: float. Decay to use to maintain the moving averages of trained variables. start_step: int. What step to start the moving average. @@ -72,6 +76,7 @@ def __init__(self, """ super().__init__(name, **kwargs) self._average_decay = average_decay + self._trainable_weights_only = trainable_weights_only self._start_step = tf.constant(start_step, tf.float32) self._dynamic_decay = dynamic_decay self._optimizer = optimizer @@ -81,12 +86,17 @@ def __init__(self, def shadow_copy(self, model: tf.keras.Model): """Creates shadow variables for the given model weights.""" - for var in model.weights: + + if self._trainable_weights_only: + self._model_weights = model.trainable_variables + else: + self._model_weights = model.variables + for var in self._model_weights: self.add_slot(var, 'average', initializer='zeros') + self._average_weights = [ - self.get_slot(var, 'average') for var in model.weights + self.get_slot(var, 'average') for var in self._model_weights ] - self._model_weights = model.weights @property def has_shadow_copy(self): diff --git a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml index 3d68f539601..83875d1273a 100644 --- a/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml +++ b/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml @@ -80,6 +80,7 @@ trainer: optimizer_config: ema: average_decay: 0.9999 + trainable_weights_only: false learning_rate: cosine: decay_steps: 73682 diff --git a/official/vision/beta/configs/image_classification.py b/official/vision/beta/configs/image_classification.py index e80c85f87fd..7044a4c0004 100644 --- a/official/vision/beta/configs/image_classification.py +++ b/official/vision/beta/configs/image_classification.py @@ -227,7 +227,8 @@ def image_classification_imagenet_resnetrs() -> cfg.ExperimentConfig: } }, 'ema': { - 'average_decay': 0.9999 + 'average_decay': 0.9999, + 'trainable_weights_only': False, }, 'learning_rate': { 'type': 'cosine', From 63719f08ae3073dede98d0722a096f77891aa965 Mon Sep 17 00:00:00 2001 From: Anirudh Vegesana Date: Mon, 24 May 2021 20:03:15 -0400 Subject: [PATCH 08/10] YOLO Family: Updated model (#9923) * Update YOLO model * Fix some docstrings * Fix docstrings * Address some of Dr. Davis' changes * Give descriptive names to the test cases * Fix bugs * Fix YOLO head imports * docstring and variable name updates * docstring and variable name updates * docstring and variable name updates Co-authored-by: vishnubanna Co-authored-by: Vishnu Banna <43182884+vishnubanna@users.noreply.github.com> --- .../beta/projects/yolo/configs/backbones.py | 6 +- .../yolo/configs/darknet_classification.py | 2 +- .../yolo/modeling/backbones/darknet.py | 608 +++++-- .../yolo/modeling/backbones/darknet_test.py | 71 +- .../yolo/modeling/decoders/__init__.py | 0 .../yolo/modeling/decoders/yolo_decoder.py | 484 ++++++ .../modeling/decoders/yolo_decoder_test.py | 154 ++ .../projects/yolo/modeling/heads/__init__.py | 0 .../projects/yolo/modeling/heads/yolo_head.py | 120 ++ .../yolo/modeling/heads/yolo_head_test.py | 77 + .../yolo/modeling/layers/nn_blocks.py | 1500 +++++++++++++---- .../yolo/modeling/layers/nn_blocks_test.py | 201 ++- 12 files changed, 2590 insertions(+), 633 deletions(-) create mode 100644 official/vision/beta/projects/yolo/modeling/decoders/__init__.py create mode 100644 official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py create mode 100644 official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py create mode 100644 official/vision/beta/projects/yolo/modeling/heads/__init__.py create mode 100644 official/vision/beta/projects/yolo/modeling/heads/yolo_head.py create mode 100644 official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py diff --git a/official/vision/beta/projects/yolo/configs/backbones.py b/official/vision/beta/projects/yolo/configs/backbones.py index a79cb09e17e..a99141543e1 100644 --- a/official/vision/beta/projects/yolo/configs/backbones.py +++ b/official/vision/beta/projects/yolo/configs/backbones.py @@ -24,11 +24,11 @@ @dataclasses.dataclass -class DarkNet(hyperparams.Config): - """DarkNet config.""" +class Darknet(hyperparams.Config): + """Darknet config.""" model_id: str = "darknet53" @dataclasses.dataclass class Backbone(backbones.Backbone): - darknet: DarkNet = DarkNet() + darknet: Darknet = Darknet() diff --git a/official/vision/beta/projects/yolo/configs/darknet_classification.py b/official/vision/beta/projects/yolo/configs/darknet_classification.py index b33e149d484..ffaf387fac0 100644 --- a/official/vision/beta/projects/yolo/configs/darknet_classification.py +++ b/official/vision/beta/projects/yolo/configs/darknet_classification.py @@ -32,7 +32,7 @@ class ImageClassificationModel(hyperparams.Config): num_classes: int = 0 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( - type='darknet', resnet=backbones.DarkNet()) + type='darknet', darknet=backbones.Darknet()) dropout_rate: float = 0.0 norm_activation: common.NormActivation = common.NormActivation() # Adds a BatchNormalization layer pre-GlobalAveragePooling in classification diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py index 170c6bb7680..db00dfd5ad5 100644 --- a/official/vision/beta/projects/yolo/modeling/backbones/darknet.py +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet.py @@ -29,44 +29,51 @@ arXiv:1911.11929 -DarkNets Are used mainly for Object detection in: +Darknets are used mainly for object detection in: [1] Joseph Redmon, Ali Farhadi YOLOv3: An Incremental Improvement. arXiv:1804.02767 [2] Alexey Bochkovskiy, Chien-Yao Wang, Hong-Yuan Mark Liao YOLOv4: Optimal Speed and Accuracy of Object Detection. arXiv:2004.10934 """ -import collections +import collections import tensorflow as tf from official.modeling import hyperparams from official.vision.beta.modeling.backbones import factory from official.vision.beta.projects.yolo.modeling.layers import nn_blocks +# builder required classes -class BlockConfig(object): - """Get layer config to make code more readable. - Args: - layer: string layer name - stack: the type of layer ordering to use for this specific level - repetitions: integer for the number of times to repeat block - bottelneck: boolean for does this stack have a bottle neck layer - filters: integer for the output depth of the level - pool_size: integer the pool_size of max pool layers - kernel_size: optional integer, for convolution kernel size - strides: integer or tuple to indicate convolution strides - padding: the padding to apply to layers in this stack - activation: string for the activation to use for this stack - route: integer for what level to route from to get the next input - output_name: the name to use for this output - is_output: is this layer an output in the default model +class BlockConfig: + """ + Class to store layer config to make code more readable """ def __init__(self, layer, stack, reps, bottleneck, filters, pool_size, - kernel_size, strides, padding, activation, route, output_name, - is_output): + kernel_size, strides, padding, activation, route, dilation_rate, + output_name, is_output): + """ + Args: + layer: A `str` for layer name. + stack: A `str` for the type of layer ordering to use for this specific + level. + reps: An `int` for the number of times to repeat block. + bottleneck: A `bool` for whether this stack has a bottle neck layer. + filters: An `int` for the output depth of the level. + pool_size: An `int` for the pool_size of max pool layers. + kernel_size: An `int` for convolution kernel size. + strides: A `Union[int, tuple]` that indicates convolution strides. + padding: An `int` for the padding to apply to layers in this stack. + activation: A `str` for the activation to use for this stack. + route: An `int` for the level to route from to get the next input. + dilation_rate: An `int` for the scale used in dialated Darknet. + output_name: A `str` for the name to use for this output. + is_output: A `bool` for whether this layer is an output in the default + model. + """ self.layer = layer self.stack = stack self.repetitions = reps @@ -78,6 +85,7 @@ def __init__(self, layer, stack, reps, bottleneck, filters, pool_size, self.padding = padding self.activation = activation self.route = route + self.dilation_rate = dilation_rate self.output_name = output_name self.is_output = is_output @@ -89,41 +97,40 @@ def build_block_specs(config): return specs -class LayerFactory(object): - """Class for quick look up of default layers. - - Used by darknet to connect, introduce or exit a level. Used in place of an if - condition or switch to make adding new layers easier and to reduce redundant - code. +class LayerBuilder: + """ + class for quick look up of default layers used by darknet to + connect, introduce or exit a level. Used in place of an if condition + or switch to make adding new layers easier and to reduce redundant code """ def __init__(self): self._layer_dict = { - "ConvBN": (nn_blocks.ConvBN, self.conv_bn_config_todict), - "MaxPool": (tf.keras.layers.MaxPool2D, self.maxpool_config_todict) + 'ConvBN': (nn_blocks.ConvBN, self.conv_bn_config_todict), + 'MaxPool': (tf.keras.layers.MaxPool2D, self.maxpool_config_todict) } def conv_bn_config_todict(self, config, kwargs): dictvals = { - "filters": config.filters, - "kernel_size": config.kernel_size, - "strides": config.strides, - "padding": config.padding + 'filters': config.filters, + 'kernel_size': config.kernel_size, + 'strides': config.strides, + 'padding': config.padding } dictvals.update(kwargs) return dictvals def darktiny_config_todict(self, config, kwargs): - dictvals = {"filters": config.filters, "strides": config.strides} + dictvals = {'filters': config.filters, 'strides': config.strides} dictvals.update(kwargs) return dictvals def maxpool_config_todict(self, config, kwargs): return { - "pool_size": config.pool_size, - "strides": config.strides, - "padding": config.padding, - "name": kwargs["name"] + 'pool_size': config.pool_size, + 'strides': config.strides, + 'padding': config.padding, + 'name': kwargs['name'] } def __call__(self, config, kwargs): @@ -134,90 +141,259 @@ def __call__(self, config, kwargs): # model configs LISTNAMES = [ - "default_layer_name", "level_type", "number_of_layers_in_level", - "bottleneck", "filters", "kernal_size", "pool_size", "strides", "padding", - "default_activation", "route", "level/name", "is_output" + 'default_layer_name', 'level_type', 'number_of_layers_in_level', + 'bottleneck', 'filters', 'kernal_size', 'pool_size', 'strides', 'padding', + 'default_activation', 'route', 'dilation', 'level/name', 'is_output' ] -# pylint: disable=line-too-long CSPDARKNET53 = { - "list_names": LISTNAMES, - "splits": {"backbone_split": 106, - "neck_split": 138}, - "backbone": [ - ["ConvBN", None, 1, False, 32, None, 3, 1, "same", "mish", -1, 0, False], - ["DarkRes", "csp", 1, True, 64, None, None, None, None, "mish", -1, 1, False], - ["DarkRes", "csp", 2, False, 128, None, None, None, None, "mish", -1, 2, False], - ["DarkRes", "csp", 8, False, 256, None, None, None, None, "mish", -1, 3, True], - ["DarkRes", "csp", 8, False, 512, None, None, None, None, "mish", -1, 4, True], - ["DarkRes", "csp", 4, False, 1024, None, None, None, None, "mish", -1, 5, True], + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 106, + 'neck_split': 132 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 32, None, 3, 1, 'same', 'mish', -1, 1, 0, + False + ], + [ + 'DarkRes', 'csp', 1, True, 64, None, None, None, None, 'mish', -1, + 1, 1, False + ], + [ + 'DarkRes', 'csp', 2, False, 128, None, None, None, None, 'mish', -1, + 1, 2, False + ], + [ + 'DarkRes', 'csp', 8, False, 256, None, None, None, None, 'mish', -1, + 1, 3, True + ], + [ + 'DarkRes', 'csp', 8, False, 512, None, None, None, None, 'mish', -1, + 2, 4, True + ], + [ + 'DarkRes', 'csp', 4, False, 1024, None, None, None, None, 'mish', + -1, 4, 5, True + ], + ] +} + +CSPADARKNET53 = { + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 100, + 'neck_split': 135 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 32, None, 3, 1, 'same', 'mish', -1, 1, 0, + False + ], + [ + 'DarkRes', 'residual', 1, True, 64, None, None, None, None, 'mish', + -1, 1, 1, False + ], + [ + 'DarkRes', 'csp', 2, False, 128, None, None, None, None, 'mish', -1, + 1, 2, False + ], + [ + 'DarkRes', 'csp', 8, False, 256, None, None, None, None, 'mish', -1, + 1, 3, True + ], + [ + 'DarkRes', 'csp', 8, False, 512, None, None, None, None, 'mish', -1, + 2, 4, True + ], + [ + 'DarkRes', 'csp', 4, False, 1024, None, None, None, None, 'mish', + -1, 4, 5, True + ], + ] +} + +LARGECSP53 = { + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 100, + 'neck_split': 135 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 32, None, 3, 1, 'same', 'mish', -1, 1, 0, + False + ], + [ + 'DarkRes', 'csp', 1, True, 64, None, None, None, None, 'mish', -1, + 1, 1, False + ], + [ + 'DarkRes', 'csp', 3, False, 128, None, None, None, None, 'mish', -1, + 1, 2, False + ], + [ + 'DarkRes', 'csp', 15, False, 256, None, None, None, None, 'mish', + -1, 1, 3, True + ], + [ + 'DarkRes', 'csp', 15, False, 512, None, None, None, None, 'mish', + -1, 2, 4, True + ], + [ + 'DarkRes', 'csp', 7, False, 1024, None, None, None, None, 'mish', + -1, 4, 5, True + ], + [ + 'DarkRes', 'csp', 7, False, 1024, None, None, None, None, 'mish', + -1, 8, 6, True + ], + [ + 'DarkRes', 'csp', 7, False, 1024, None, None, None, None, 'mish', + -1, 16, 7, True + ], ] } DARKNET53 = { - "list_names": LISTNAMES, - "splits": {"backbone_split": 76}, - "backbone": [ - ["ConvBN", None, 1, False, 32, None, 3, 1, "same", "leaky", -1, 0, False], - ["DarkRes", "residual", 1, True, 64, None, None, None, None, "leaky", -1, 1, False], - ["DarkRes", "residual", 2, False, 128, None, None, None, None, "leaky", -1, 2, False], - ["DarkRes", "residual", 8, False, 256, None, None, None, None, "leaky", -1, 3, True], - ["DarkRes", "residual", 8, False, 512, None, None, None, None, "leaky", -1, 4, True], - ["DarkRes", "residual", 4, False, 1024, None, None, None, None, "leaky", -1, 5, True], + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 76 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 32, None, 3, 1, 'same', 'leaky', -1, 1, 0, + False + ], + [ + 'DarkRes', 'residual', 1, True, 64, None, None, None, None, 'leaky', + -1, 1, 1, False + ], + [ + 'DarkRes', 'residual', 2, False, 128, None, None, None, None, + 'leaky', -1, 1, 2, False + ], + [ + 'DarkRes', 'residual', 8, False, 256, None, None, None, None, + 'leaky', -1, 1, 3, True + ], + [ + 'DarkRes', 'residual', 8, False, 512, None, None, None, None, + 'leaky', -1, 2, 4, True + ], + [ + 'DarkRes', 'residual', 4, False, 1024, None, None, None, None, + 'leaky', -1, 4, 5, True + ], ] } CSPDARKNETTINY = { - "list_names": LISTNAMES, - "splits": {"backbone_split": 28}, - "backbone": [ - ["ConvBN", None, 1, False, 32, None, 3, 2, "same", "leaky", -1, 0, False], - ["ConvBN", None, 1, False, 64, None, 3, 2, "same", "leaky", -1, 1, False], - ["CSPTiny", "csp_tiny", 1, False, 64, None, 3, 2, "same", "leaky", -1, 2, False], - ["CSPTiny", "csp_tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], - ["CSPTiny", "csp_tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], - ["ConvBN", None, 1, False, 512, None, 3, 1, "same", "leaky", -1, 5, True], + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 28 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 32, None, 3, 2, 'same', 'leaky', -1, 1, 0, + False + ], + [ + 'ConvBN', None, 1, False, 64, None, 3, 2, 'same', 'leaky', -1, 1, 1, + False + ], + [ + 'CSPTiny', 'csp_tiny', 1, False, 64, None, 3, 2, 'same', 'leaky', + -1, 1, 2, False + ], + [ + 'CSPTiny', 'csp_tiny', 1, False, 128, None, 3, 2, 'same', 'leaky', + -1, 1, 3, False + ], + [ + 'CSPTiny', 'csp_tiny', 1, False, 256, None, 3, 2, 'same', 'leaky', + -1, 1, 4, True + ], + [ + 'ConvBN', None, 1, False, 512, None, 3, 1, 'same', 'leaky', -1, 1, + 5, True + ], ] } DARKNETTINY = { - "list_names": LISTNAMES, - "splits": {"backbone_split": 14}, - "backbone": [ - ["ConvBN", None, 1, False, 16, None, 3, 1, "same", "leaky", -1, 0, False], - ["DarkTiny", "tiny", 1, True, 32, None, 3, 2, "same", "leaky", -1, 1, False], - ["DarkTiny", "tiny", 1, True, 64, None, 3, 2, "same", "leaky", -1, 2, False], - ["DarkTiny", "tiny", 1, False, 128, None, 3, 2, "same", "leaky", -1, 3, False], - ["DarkTiny", "tiny", 1, False, 256, None, 3, 2, "same", "leaky", -1, 4, True], - ["DarkTiny", "tiny", 1, False, 512, None, 3, 2, "same", "leaky", -1, 5, False], - ["DarkTiny", "tiny", 1, False, 1024, None, 3, 1, "same", "leaky", -1, 5, True], + 'list_names': + LISTNAMES, + 'splits': { + 'backbone_split': 14 + }, + 'backbone': [ + [ + 'ConvBN', None, 1, False, 16, None, 3, 1, 'same', 'leaky', -1, 1, 0, + False + ], + [ + 'DarkTiny', 'tiny', 1, True, 32, None, 3, 2, 'same', 'leaky', -1, 1, + 1, False + ], + [ + 'DarkTiny', 'tiny', 1, True, 64, None, 3, 2, 'same', 'leaky', -1, 1, + 2, False + ], + [ + 'DarkTiny', 'tiny', 1, False, 128, None, 3, 2, 'same', 'leaky', -1, + 1, 3, False + ], + [ + 'DarkTiny', 'tiny', 1, False, 256, None, 3, 2, 'same', 'leaky', -1, + 1, 4, True + ], + [ + 'DarkTiny', 'tiny', 1, False, 512, None, 3, 2, 'same', 'leaky', -1, + 1, 5, False + ], + [ + 'DarkTiny', 'tiny', 1, False, 1024, None, 3, 1, 'same', 'leaky', -1, + 1, 5, True + ], ] } -# pylint: enable=line-too-long BACKBONES = { - "darknettiny": DARKNETTINY, - "darknet53": DARKNET53, - "cspdarknet53": CSPDARKNET53, - "cspdarknettiny": CSPDARKNETTINY + 'darknettiny': DARKNETTINY, + 'darknet53': DARKNET53, + 'cspdarknet53': CSPDARKNET53, + 'altered_cspdarknet53': CSPADARKNET53, + 'cspdarknettiny': CSPDARKNETTINY, + 'csp-large': LARGECSP53, } -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class Darknet(tf.keras.Model): - """Darknet backbone.""" + """ The Darknet backbone architecture """ def __init__( self, - model_id="darknet53", + model_id='darknet53', input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]), min_level=None, max_level=5, + width_scale=1.0, + depth_scale=1.0, + csp_level_mod=(), activation=None, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, - kernel_initializer="glorot_uniform", + dilate=False, + kernel_initializer='glorot_uniform', kernel_regularizer=None, bias_regularizer=None, **kwargs): @@ -227,12 +403,13 @@ def __init__( self._model_name = model_id self._splits = splits self._input_shape = input_specs - self._registry = LayerFactory() + self._registry = LayerBuilder() # default layer look up self._min_size = min_level self._max_size = max_level self._output_specs = None + self._csp_level_mod = set(csp_level_mod) self._kernel_initializer = kernel_initializer self._bias_regularizer = bias_regularizer @@ -241,16 +418,20 @@ def __init__( self._use_sync_bn = use_sync_bn self._activation = activation self._kernel_regularizer = kernel_regularizer + self._dilate = dilate + self._width_scale = width_scale + self._depth_scale = depth_scale self._default_dict = { - "kernel_initializer": self._kernel_initializer, - "kernel_regularizer": self._kernel_regularizer, - "bias_regularizer": self._bias_regularizer, - "norm_momentum": self._norm_momentum, - "norm_epsilon": self._norm_epislon, - "use_sync_bn": self._use_sync_bn, - "activation": self._activation, - "name": None + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epislon, + 'use_sync_bn': self._use_sync_bn, + 'activation': self._activation, + 'dilation_rate': 1, + 'name': None } inputs = tf.keras.layers.Input(shape=self._input_shape.shape[1:]) @@ -273,33 +454,39 @@ def _build_struct(self, net, inputs): endpoints = collections.OrderedDict() stack_outputs = [inputs] for i, config in enumerate(net): + if config.output_name > self._max_size: + break + if config.output_name in self._csp_level_mod: + config.stack = 'residual' + + config.filters = int(config.filters * self._width_scale) + config.repetitions = int(config.repetitions * self._depth_scale) + if config.stack is None: - x = self._build_block(stack_outputs[config.route], - config, - name=f"{config.layer}_{i}") + x = self._build_block( + stack_outputs[config.route], config, name=f'{config.layer}_{i}') stack_outputs.append(x) - elif config.stack == "residual": - x = self._residual_stack(stack_outputs[config.route], - config, - name=f"{config.layer}_{i}") + elif config.stack == 'residual': + x = self._residual_stack( + stack_outputs[config.route], config, name=f'{config.layer}_{i}') stack_outputs.append(x) - elif config.stack == "csp": - x = self._csp_stack(stack_outputs[config.route], - config, - name=f"{config.layer}_{i}") + elif config.stack == 'csp': + x = self._csp_stack( + stack_outputs[config.route], config, name=f'{config.layer}_{i}') stack_outputs.append(x) - elif config.stack == "csp_tiny": - x_pass, x = self._csp_tiny_stack(stack_outputs[config.route], - config, name=f"{config.layer}_{i}") + elif config.stack == 'csp_tiny': + x_pass, x = self._csp_tiny_stack( + stack_outputs[config.route], config, name=f'{config.layer}_{i}') stack_outputs.append(x_pass) - elif config.stack == "tiny": - x = self._tiny_stack(stack_outputs[config.route], - config, - name=f"{config.layer}_{i}") + elif config.stack == 'tiny': + x = self._tiny_stack( + stack_outputs[config.route], config, name=f'{config.layer}_{i}') stack_outputs.append(x) if (config.is_output and self._min_size is None): endpoints[str(config.output_name)] = x - elif self._min_size is not None and config.output_name >= self._min_size and config.output_name <= self._max_size: + elif (self._min_size is not None and + config.output_name >= self._min_size and + config.output_name <= self._max_size): endpoints[str(config.output_name)] = x self._output_specs = {l: endpoints[l].get_shape() for l in endpoints.keys()} @@ -308,8 +495,7 @@ def _build_struct(self, net, inputs): def _get_activation(self, activation): if self._activation is None: return activation - else: - return self._activation + return self._activation def _csp_stack(self, inputs, config, name): if config.bottleneck: @@ -320,86 +506,135 @@ def _csp_stack(self, inputs, config, name): csp_filter_scale = 2 residual_filter_scale = 1 scale_filters = 2 - self._default_dict["activation"] = self._get_activation(config.activation) - self._default_dict["name"] = f"{name}_csp_down" - x, x_route = nn_blocks.CSPRoute(filters=config.filters, - filter_scale=csp_filter_scale, - downsample=True, - **self._default_dict)(inputs) - for i in range(config.repetitions): - self._default_dict["name"] = f"{name}_{i}" - x = nn_blocks.DarkResidual(filters=config.filters // scale_filters, - filter_scale=residual_filter_scale, - **self._default_dict)(x) - - self._default_dict["name"] = f"{name}_csp_connect" - output = nn_blocks.CSPConnect(filters=config.filters, - filter_scale=csp_filter_scale, - **self._default_dict)([x, x_route]) - self._default_dict["activation"] = self._activation - self._default_dict["name"] = None + self._default_dict['activation'] = self._get_activation(config.activation) + self._default_dict['name'] = f'{name}_csp_down' + if self._dilate: + self._default_dict['dilation_rate'] = config.dilation_rate + else: + self._default_dict['dilation_rate'] = 1 + + # swap/add dilation + x, x_route = nn_blocks.CSPRoute( + filters=config.filters, + filter_scale=csp_filter_scale, + downsample=True, + **self._default_dict)( + inputs) + + dilated_reps = config.repetitions - self._default_dict['dilation_rate'] // 2 + for i in range(dilated_reps): + self._default_dict['name'] = f'{name}_{i}' + x = nn_blocks.DarkResidual( + filters=config.filters // scale_filters, + filter_scale=residual_filter_scale, + **self._default_dict)( + x) + + for i in range(dilated_reps, config.repetitions): + self._default_dict[ + 'dilation_rate'] = self._default_dict['dilation_rate'] // 2 + self._default_dict[ + 'name'] = f"{name}_{i}_degridded_{self._default_dict['dilation_rate']}" + x = nn_blocks.DarkResidual( + filters=config.filters // scale_filters, + filter_scale=residual_filter_scale, + **self._default_dict)( + x) + + self._default_dict['name'] = f'{name}_csp_connect' + output = nn_blocks.CSPConnect( + filters=config.filters, + filter_scale=csp_filter_scale, + **self._default_dict)([x, x_route]) + self._default_dict['activation'] = self._activation + self._default_dict['name'] = None return output def _csp_tiny_stack(self, inputs, config, name): - self._default_dict["activation"] = self._get_activation(config.activation) - self._default_dict["name"] = f"{name}_csp_tiny" - x, x_route = nn_blocks.CSPTiny(filters=config.filters, - **self._default_dict)(inputs) - self._default_dict["activation"] = self._activation - self._default_dict["name"] = None + self._default_dict['activation'] = self._get_activation(config.activation) + self._default_dict['name'] = f'{name}_csp_tiny' + x, x_route = nn_blocks.CSPTiny( + filters=config.filters, **self._default_dict)( + inputs) + self._default_dict['activation'] = self._activation + self._default_dict['name'] = None return x, x_route def _tiny_stack(self, inputs, config, name): - x = tf.keras.layers.MaxPool2D(pool_size=2, - strides=config.strides, - padding="same", - data_format=None, - name=f"{name}_tiny/pool")(inputs) - self._default_dict["activation"] = self._get_activation(config.activation) - self._default_dict["name"] = f"{name}_tiny/conv" + x = tf.keras.layers.MaxPool2D( + pool_size=2, + strides=config.strides, + padding='same', + data_format=None, + name=f'{name}_tiny/pool')( + inputs) + self._default_dict['activation'] = self._get_activation(config.activation) + self._default_dict['name'] = f'{name}_tiny/conv' x = nn_blocks.ConvBN( filters=config.filters, kernel_size=(3, 3), strides=(1, 1), - padding="same", + padding='same', **self._default_dict)( x) - self._default_dict["activation"] = self._activation - self._default_dict["name"] = None + self._default_dict['activation'] = self._activation + self._default_dict['name'] = None return x def _residual_stack(self, inputs, config, name): - self._default_dict["activation"] = self._get_activation(config.activation) - self._default_dict["name"] = f"{name}_residual_down" - x = nn_blocks.DarkResidual(filters=config.filters, - downsample=True, - **self._default_dict)(inputs) - for i in range(config.repetitions - 1): - self._default_dict["name"] = f"{name}_{i}" - x = nn_blocks.DarkResidual(filters=config.filters, - **self._default_dict)(x) - self._default_dict["activation"] = self._activation - self._default_dict["name"] = None + self._default_dict['activation'] = self._get_activation(config.activation) + self._default_dict['name'] = f'{name}_residual_down' + if self._dilate: + self._default_dict['dilation_rate'] = config.dilation_rate + if config.repetitions < 8: + config.repetitions += 2 + else: + self._default_dict['dilation_rate'] = 1 + + x = nn_blocks.DarkResidual( + filters=config.filters, downsample=True, **self._default_dict)( + inputs) + + dilated_reps = config.repetitions - \ + (self._default_dict['dilation_rate'] // 2) - 1 + for i in range(dilated_reps): + self._default_dict['name'] = f'{name}_{i}' + x = nn_blocks.DarkResidual( + filters=config.filters, **self._default_dict)( + x) + + for i in range(dilated_reps, config.repetitions - 1): + self._default_dict[ + 'dilation_rate'] = self._default_dict['dilation_rate'] // 2 + self._default_dict[ + 'name'] = f"{name}_{i}_degridded_{self._default_dict['dilation_rate']}" + x = nn_blocks.DarkResidual( + filters=config.filters, **self._default_dict)( + x) + + self._default_dict['activation'] = self._activation + self._default_dict['name'] = None + self._default_dict['dilation_rate'] = 1 return x def _build_block(self, inputs, config, name): x = inputs i = 0 - self._default_dict["activation"] = self._get_activation(config.activation) + self._default_dict['activation'] = self._get_activation(config.activation) while i < config.repetitions: - self._default_dict["name"] = f"{name}_{i}" + self._default_dict['name'] = f'{name}_{i}' layer = self._registry(config, self._default_dict) x = layer(x) i += 1 - self._default_dict["activation"] = self._activation - self._default_dict["name"] = None + self._default_dict['activation'] = self._activation + self._default_dict['name'] = None return x @staticmethod def get_model_config(name): name = name.lower() - backbone = BACKBONES[name]["backbone"] - splits = BACKBONES[name]["splits"] + backbone = BACKBONES[name]['backbone'] + splits = BACKBONES[name]['splits'] return build_block_specs(backbone), splits @property @@ -412,35 +647,42 @@ def from_config(cls, config, custom_objects=None): def get_config(self): layer_config = { - "model_id": self._model_name, - "min_level": self._min_size, - "max_level": self._max_size, - "kernel_initializer": self._kernel_initializer, - "kernel_regularizer": self._kernel_regularizer, - "bias_regularizer": self._bias_regularizer, - "norm_momentum": self._norm_momentum, - "norm_epsilon": self._norm_epislon, - "use_sync_bn": self._use_sync_bn, - "activation": self._activation + 'model_id': self._model_name, + 'min_level': self._min_size, + 'max_level': self._max_size, + 'kernel_initializer': self._kernel_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'bias_regularizer': self._bias_regularizer, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epislon, + 'use_sync_bn': self._use_sync_bn, + 'activation': self._activation, } return layer_config -@factory.register_backbone_builder("darknet") +@factory.register_backbone_builder('darknet') def build_darknet( input_specs: tf.keras.layers.InputSpec, backbone_config: hyperparams.Config, norm_activation_config: hyperparams.Config, l2_regularizer: tf.keras.regularizers.Regularizer = None) -> tf.keras.Model: - """Builds darknet backbone.""" - backbone_cfg = backbone_config.get() + backbone_cfg = model_config.backbone.get() + norm_activation_config = model_config.norm_activation + model = Darknet( model_id=backbone_cfg.model_id, - input_shape=input_specs, + min_level=model_config.min_level, + max_level=model_config.max_level, + input_specs=input_specs, + dilate=backbone_cfg.dilate, + width_scale=backbone_cfg.width_scale, + depth_scale=backbone_cfg.depth_scale, activation=norm_activation_config.activation, use_sync_bn=norm_activation_config.use_sync_bn, norm_momentum=norm_activation_config.norm_momentum, norm_epsilon=norm_activation_config.norm_epsilon, kernel_regularizer=l2_regularizer) + model.summary() return model diff --git a/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py index 76c595f2dd7..8678c2c93e6 100644 --- a/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py +++ b/official/vision/beta/projects/yolo/modeling/backbones/darknet_test.py @@ -13,7 +13,7 @@ # limitations under the License. # Lint as: python3 -"""Tests for resnet.""" +"""Tests for yolo.""" from absl.testing import parameterized import numpy as np @@ -24,40 +24,53 @@ from official.vision.beta.projects.yolo.modeling.backbones import darknet -class DarkNetTest(parameterized.TestCase, tf.test.TestCase): +class DarknetTest(parameterized.TestCase, tf.test.TestCase): @parameterized.parameters( - (224, "darknet53", 2, 1), - (224, "darknettiny", 1, 2), - (224, "cspdarknettiny", 1, 1), - (224, "cspdarknet53", 2, 1), + (224, 'darknet53', 2, 1, True), + (224, 'darknettiny', 1, 2, False), + (224, 'cspdarknettiny', 1, 1, False), + (224, 'cspdarknet53', 2, 1, True), ) - def test_network_creation(self, input_size, model_id, - endpoint_filter_scale, scale_final): + def test_network_creation(self, input_size, model_id, endpoint_filter_scale, + scale_final, dilate): """Test creation of ResNet family models.""" - tf.keras.backend.set_image_data_format("channels_last") + tf.keras.backend.set_image_data_format('channels_last') - network = darknet.Darknet(model_id=model_id, min_level=3, max_level=5) + network = darknet.Darknet( + model_id=model_id, min_level=3, max_level=5, dilate=dilate) self.assertEqual(network.model_id, model_id) inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) endpoints = network(inputs) - self.assertAllEqual( - [1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale], - endpoints["3"].shape.as_list()) - self.assertAllEqual( - [1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale], - endpoints["4"].shape.as_list()) - self.assertAllEqual([ - 1, input_size / 2**5, input_size / 2**5, - 512 * endpoint_filter_scale * scale_final - ], endpoints["5"].shape.as_list()) + if dilate: + self.assertAllEqual([ + 1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale + ], endpoints['3'].shape.as_list()) + self.assertAllEqual([ + 1, input_size / 2**3, input_size / 2**3, 256 * endpoint_filter_scale + ], endpoints['4'].shape.as_list()) + self.assertAllEqual([ + 1, input_size / 2**3, input_size / 2**3, + 512 * endpoint_filter_scale * scale_final + ], endpoints['5'].shape.as_list()) + else: + self.assertAllEqual([ + 1, input_size / 2**3, input_size / 2**3, 128 * endpoint_filter_scale + ], endpoints['3'].shape.as_list()) + self.assertAllEqual([ + 1, input_size / 2**4, input_size / 2**4, 256 * endpoint_filter_scale + ], endpoints['4'].shape.as_list()) + self.assertAllEqual([ + 1, input_size / 2**5, input_size / 2**5, + 512 * endpoint_filter_scale * scale_final + ], endpoints['5'].shape.as_list()) @combinations.generate( combinations.combine( strategy=[ - strategy_combinations.cloud_tpu_strategy, + strategy_combinations.tpu_strategy, strategy_combinations.one_device_strategy_gpu, ], use_sync_bn=[False, True], @@ -66,20 +79,20 @@ def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): """Test for sync bn on TPU and GPU devices.""" inputs = np.random.rand(1, 224, 224, 3) - tf.keras.backend.set_image_data_format("channels_last") + tf.keras.backend.set_image_data_format('channels_last') with strategy.scope(): - network = darknet.Darknet(model_id="darknet53", min_size=3, max_size=5) + network = darknet.Darknet(model_id='darknet53', min_size=3, max_size=5) _ = network(inputs) @parameterized.parameters(1, 3, 4) def test_input_specs(self, input_dim): """Test different input feature dimensions.""" - tf.keras.backend.set_image_data_format("channels_last") + tf.keras.backend.set_image_data_format('channels_last') input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim]) network = darknet.Darknet( - model_id="darknet53", min_level=3, max_level=5, input_specs=input_specs) + model_id='darknet53', min_level=3, max_level=5, input_specs=input_specs) inputs = tf.keras.Input(shape=(224, 224, input_dim), batch_size=1) _ = network(inputs) @@ -87,14 +100,14 @@ def test_input_specs(self, input_dim): def test_serialize_deserialize(self): # Create a network object that sets all of its config options. kwargs = dict( - model_id="darknet53", + model_id='darknet53', min_level=3, max_level=5, use_sync_bn=False, - activation="relu", + activation='relu', norm_momentum=0.99, norm_epsilon=0.001, - kernel_initializer="VarianceScaling", + kernel_initializer='VarianceScaling', kernel_regularizer=None, bias_regularizer=None, ) @@ -113,5 +126,5 @@ def test_serialize_deserialize(self): self.assertAllEqual(network.get_config(), new_network.get_config()) -if __name__ == "__main__": +if __name__ == '__main__': tf.test.main() diff --git a/official/vision/beta/projects/yolo/modeling/decoders/__init__.py b/official/vision/beta/projects/yolo/modeling/decoders/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py new file mode 100644 index 00000000000..1dbaae6ebf1 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py @@ -0,0 +1,484 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Feature Pyramid Network and Path Aggregation variants used in YOLO""" + +import tensorflow as tf +from official.vision.beta.projects.yolo.modeling.layers import nn_blocks + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class _IdentityRoute(tf.keras.layers.Layer): + + def __init__(self, **kwargs): + """Private class to mirror the outputs of blocks in nn_blocks for an easier + programatic generation of the feature pyramid network""" + + super().__init__(**kwargs) + + def call(self, inputs): # pylint: disable=arguments-differ + return None, inputs + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class YoloFPN(tf.keras.layers.Layer): + """YOLO Feature pyramid network.""" + + def __init__(self, + fpn_depth=4, + use_spatial_attention=False, + csp_stack=False, + activation='leaky', + fpn_filter_scale=1, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='glorot_uniform', + kernel_regularizer=None, + bias_regularizer=None, + **kwargs): + """Yolo FPN initialization function (Yolo V4). + + Args: + fpn_depth: `int`, number of layers to use in each FPN path + if you choose to use an FPN. + use_spatial_attention: `bool`, use the spatial attention module. + csp_stack: `bool`, CSPize the FPN. + activation: `str`, the activation function to use typically leaky or mish. + fpn_filter_scale: `int`, scaling factor for the FPN filters. + use_sync_bn: if True, use synchronized batch normalization. + norm_momentum: `float`, normalization momentum for the moving average. + norm_epsilon: `float`, small float added to variance to avoid dividing by + zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + **kwargs: keyword arguments to be passed. + """ + + super().__init__(**kwargs) + self._fpn_depth = fpn_depth + + self._activation = activation + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._use_spatial_attention = use_spatial_attention + self._filter_scale = fpn_filter_scale + self._csp_stack = csp_stack + + self._base_config = dict( + activation=self._activation, + use_sync_bn=self._use_sync_bn, + kernel_regularizer=self._kernel_regularizer, + kernel_initializer=self._kernel_initializer, + bias_regularizer=self._bias_regularizer, + norm_epsilon=self._norm_epsilon, + norm_momentum=self._norm_momentum) + + def get_raw_depths(self, minimum_depth, inputs): + """Calculates the unscaled depths of the FPN branches. + + Args: + minimum_depth (int): depth of the smallest branch of the FPN. + inputs (dict): dictionary of the shape of input args as a dictionary of + lists. + + Returns: + The unscaled depths of the FPN branches. + """ + + depths = [] + for i in range(self._min_level, self._max_level + 1): + depths.append(inputs[str(i)][-1] / self._filter_scale) + return list(reversed(depths)) + + def build(self, inputs): + """Use config dictionary to generate all important attributes for head + construction. + + Args: + inputs: dictionary of the shape of input args as a dictionary of lists. + """ + + keys = [int(key) for key in inputs.keys()] + self._min_level = min(keys) + self._max_level = max(keys) + self._min_depth = inputs[str(self._min_level)][-1] + self._depths = self.get_raw_depths(self._min_depth, inputs) + + # directly connect to an input path and process it + self.preprocessors = dict() + # resample an input and merge it with the output of another path + # inorder to aggregate backbone outputs + self.resamples = dict() + # set of convoltion layers and upsample layers that are used to + # prepare the FPN processors for output + + for level, depth in zip( + reversed(range(self._min_level, self._max_level + 1)), self._depths): + if level == self._min_level: + self.resamples[str(level)] = nn_blocks.PathAggregationBlock( + filters=depth // 2, + inverted=True, + upsample=True, + drop_final=self._csp_stack == 0, + upsample_size=2, + **self._base_config) + self.preprocessors[str(level)] = _IdentityRoute() + elif level != self._max_level: + self.resamples[str(level)] = nn_blocks.PathAggregationBlock( + filters=depth // 2, + inverted=True, + upsample=True, + drop_final=False, + upsample_size=2, + **self._base_config) + self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( + filters=depth, + repetitions=self._fpn_depth - int(level == self._min_level), + block_invert=True, + insert_spp=False, + csp_stack=self._csp_stack, + **self._base_config) + else: + self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( + filters=depth, + repetitions=self._fpn_depth + 1 * int(self._csp_stack == 0), + insert_spp=True, + block_invert=False, + csp_stack=self._csp_stack, + **self._base_config) + + def call(self, inputs): + outputs = dict() + layer_in = inputs[str(self._max_level)] + for level in reversed(range(self._min_level, self._max_level + 1)): + _, x = self.preprocessors[str(level)](layer_in) + outputs[str(level)] = x + if level > self._min_level: + x_next = inputs[str(level - 1)] + _, layer_in = self.resamples[str(level - 1)]([x_next, x]) + return outputs + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class YoloPAN(tf.keras.layers.Layer): + """YOLO Path Aggregation Network""" + + def __init__(self, + path_process_len=6, + max_level_process_len=None, + embed_spp=False, + use_spatial_attention=False, + csp_stack=False, + activation='leaky', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='glorot_uniform', + kernel_regularizer=None, + bias_regularizer=None, + fpn_input=True, + fpn_filter_scale=1.0, + **kwargs): + """Yolo Path Aggregation Network initialization function (Yolo V3 and V4). + + Args: + path_process_len: `int`, number of layers ot use in each Decoder path. + max_level_process_len: `int`, number of layers ot use in the largest + processing path, or the backbones largest output if it is different. + embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model. + use_spatial_attention: `bool`, use the spatial attention module. + csp_stack: `bool`, CSPize the FPN. + activation: `str`, the activation function to use typically leaky or mish. + use_sync_bn: if True, use synchronized batch normalization. + norm_momentum: `float`, normalization omentum for the moving average. + norm_epsilon: `float`, small float added to variance to avoid dividing + by zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + fpn_input: `bool`, for whether the input into this fucntion is an FPN or + a backbone. + fpn_filter_scale: `int`, scaling factor for the FPN filters. + **kwargs: keyword arguments to be passed. + """ + + super().__init__(**kwargs) + + self._path_process_len = path_process_len + self._embed_spp = embed_spp + self._use_spatial_attention = use_spatial_attention + + self._activation = activation + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._fpn_input = fpn_input + self._max_level_process_len = max_level_process_len + self._csp_stack = csp_stack + self._fpn_filter_scale = fpn_filter_scale + + if max_level_process_len is None: + self._max_level_process_len = path_process_len + + self._base_config = dict( + activation=self._activation, + use_sync_bn=self._use_sync_bn, + kernel_regularizer=self._kernel_regularizer, + kernel_initializer=self._kernel_initializer, + bias_regularizer=self._bias_regularizer, + norm_epsilon=self._norm_epsilon, + norm_momentum=self._norm_momentum) + + def build(self, inputs): + """Use config dictionary to generate all important attributes for head + construction. + + Args: + inputs: dictionary of the shape of input args as a dictionary of lists. + """ + + # define the key order + keys = [int(key) for key in inputs.keys()] + self._min_level = min(keys) + self._max_level = max(keys) + self._min_depth = inputs[str(self._min_level)][-1] + self._depths = self.get_raw_depths(self._min_depth, inputs) + + # directly connect to an input path and process it + self.preprocessors = dict() + # resample an input and merge it with the output of another path + # inorder to aggregate backbone outputs + self.resamples = dict() + + # FPN will reverse the key process order for the backbone, so we need + # adjust the order that objects are created and processed to adjust for + # this. not using an FPN will directly connect the decoder to the backbone + # therefore the object creation order needs to be done from the largest + # to smallest level. + if self._fpn_input: + # process order {... 3, 4, 5} + self._iterator = range(self._min_level, self._max_level + 1) + self._check = lambda x: x < self._max_level + self._key_shift = lambda x: x + 1 + self._input = self._min_level + downsample = True + upsample = False + else: + # process order {5, 4, 3, ...} + self._iterator = list( + reversed(range(self._min_level, self._max_level + 1))) + self._check = lambda x: x > self._min_level + self._key_shift = lambda x: x - 1 + self._input = self._max_level + downsample = False + upsample = True + + if self._csp_stack == 0: + proc_filters = lambda x: x + resample_filters = lambda x: x // 2 + else: + proc_filters = lambda x: x * 2 + resample_filters = lambda x: x + for level, depth in zip(self._iterator, self._depths): + if level == self._input: + self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( + filters=proc_filters(depth), + repetitions=self._max_level_process_len, + insert_spp=self._embed_spp, + block_invert=False, + insert_sam=self._use_spatial_attention, + csp_stack=self._csp_stack, + **self._base_config) + else: + self.resamples[str(level)] = nn_blocks.PathAggregationBlock( + filters=resample_filters(depth), + upsample=upsample, + downsample=downsample, + inverted=False, + drop_final=self._csp_stack == 0, + **self._base_config) + self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( + filters=proc_filters(depth), + repetitions=self._path_process_len, + insert_spp=False, + insert_sam=self._use_spatial_attention, + csp_stack=self._csp_stack, + **self._base_config) + + def get_raw_depths(self, minimum_depth, inputs): + """Calculates the unscaled depths of the FPN branches. + + Args: + minimum_depth: `int` depth of the smallest branch of the FPN. + inputs: `dict[str, tf.InputSpec]` of the shape of input args as a dictionary of + lists. + + Returns: + The unscaled depths of the FPN branches. + """ + + depths = [] + if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1: + for i in range(self._min_level, self._max_level + 1): + depths.append(inputs[str(i)][-1] * 2) + else: + for _ in range(self._min_level, self._max_level + 1): + depths.append(minimum_depth) + minimum_depth *= 2 + if self._fpn_input: + return depths + return list(reversed(depths)) + + def call(self, inputs): + outputs = dict() + layer_in = inputs[str(self._input)] + + for level in self._iterator: + x_route, x = self.preprocessors[str(level)](layer_in) + outputs[str(level)] = x + if self._check(level): + x_next = inputs[str(self._key_shift(level))] + _, layer_in = self.resamples[str( + self._key_shift(level))]([x_route, x_next]) + return outputs + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class YoloDecoder(tf.keras.Model): + """Darknet Backbone Decoder""" + + def __init__(self, + input_specs, + use_fpn=False, + use_spatial_attention=False, + csp_stack=False, + fpn_depth=4, + fpn_filter_scale=1, + path_process_len=6, + max_level_process_len=None, + embed_spp=False, + activation='leaky', + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='glorot_uniform', + kernel_regularizer=None, + bias_regularizer=None, + **kwargs): + """Yolo Decoder initialization function. A unified model that ties all decoder + components into a conditionally build YOLO decder. + + Args: + input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs + to the heads. + use_fpn: `bool`, use the FPN found in the YoloV4 model. + use_spatial_attention: `bool`, use the spatial attention module. + csp_stack: `bool`, CSPize the FPN. + fpn_depth: `int`, number of layers ot use in each FPN path + if you choose to use an FPN. + fpn_filter_scale: `int`, scaling factor for the FPN filters. + path_process_len: `int`, number of layers ot use in each Decoder path. + max_level_process_len: `int`, number of layers ot use in the largest + processing path, or the backbones largest output if it is different. + embed_spp: `bool`, use the SPP found in the YoloV3 and V4 model. + activation: `str`, the activation function to use typically leaky or mish. + use_sync_bn: if True, use synchronized batch normalization. + norm_momentum: `float`, normalization omentum for the moving average. + norm_epsilon: `float`, small float added to variance to avoid dividing by + zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + **kwargs: keyword arguments to be passed. + """ + + self._input_specs = input_specs + self._use_fpn = use_fpn + self._fpn_depth = fpn_depth + self._path_process_len = path_process_len + self._max_level_process_len = max_level_process_len + self._embed_spp = embed_spp + + self._activation = activation + self._use_sync_bn = use_sync_bn + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + self._kernel_initializer = kernel_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + + self._base_config = dict( + use_spatial_attention=use_spatial_attention, + csp_stack=csp_stack, + activation=self._activation, + use_sync_bn=self._use_sync_bn, + fpn_filter_scale=fpn_filter_scale, + norm_momentum=self._norm_momentum, + norm_epsilon=self._norm_epsilon, + kernel_initializer=self._kernel_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer) + + self._decoder_config = dict( + path_process_len=self._path_process_len, + max_level_process_len=self._max_level_process_len, + embed_spp=self._embed_spp, + fpn_input=self._use_fpn, + **self._base_config) + + inputs = { + key: tf.keras.layers.Input(shape=value[1:]) + for key, value in input_specs.items() + } + if self._use_fpn: + inter_outs = YoloFPN( + fpn_depth=self._fpn_depth, **self._base_config)( + inputs) + outputs = YoloPAN(**self._decoder_config)(inter_outs) + else: + inter_outs = None + outputs = YoloPAN(**self._decoder_config)(inputs) + + self._output_specs = {key: value.shape for key, value in outputs.items()} + super().__init__(inputs=inputs, outputs=outputs, name='YoloDecoder') + + @property + def use_fpn(self): + return self._use_fpn + + @property + def output_specs(self): + return self._output_specs + + def get_config(self): + config = dict( + input_specs=self._input_specs, + use_fpn=self._use_fpn, + fpn_depth=self._fpn_depth, + **self._decoder_config) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py new file mode 100644 index 00000000000..236396ba79d --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder_test.py @@ -0,0 +1,154 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for YOLO.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder as decoders + + +class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): + + @parameterized.parameters('1', '6spp', '6sppfpn', '6') + def test_network_creation(self, version): + """Test creation of ResNet family models.""" + tf.keras.backend.set_image_data_format('channels_last') + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + decoder = build_yolo_decoder(input_shape, version) + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + endpoints = decoder.call(inputs) + + for key in endpoints.keys(): + self.assertAllEqual(endpoints[key].shape.as_list(), input_shape[key]) + + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + + tf.keras.backend.set_image_data_format('channels_last') + + with strategy.scope(): + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + decoder = build_yolo_decoder(input_shape, '6') + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + _ = decoder.call(inputs) + + @parameterized.parameters(1, 3, 4) + def test_input_specs(self, input_dim): + """Test different input feature dimensions.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + decoder = build_yolo_decoder(input_shape, '6') + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + _ = decoder(inputs) + + def test_serialize_deserialize(self): + """Create a network object that sets all of its config options.""" + tf.keras.backend.set_image_data_format('channels_last') + + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + decoder = build_yolo_decoder(input_shape, '6') + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + _ = decoder(inputs) + config = decoder.get_config() + decoder_from_config = decoders.YoloDecoder.from_config(config) + self.assertAllEqual(decoder.get_config(), decoder_from_config.get_config()) + + +def build_yolo_decoder(input_specs, type='1'): + if type == '1': + model = decoders.YoloDecoder( + input_specs=input_specs, + embed_spp=False, + use_fpn=False, + max_level_process_len=2, + path_process_len=1, + activation='mish') + elif type == '6spp': + model = decoders.YoloDecoder( + input_specs=input_specs, + embed_spp=True, + use_fpn=False, + max_level_process_len=None, + path_process_len=6, + activation='mish') + elif type == '6sppfpn': + model = decoders.YoloDecoder( + input_specs=input_specs, + embed_spp=True, + use_fpn=True, + max_level_process_len=None, + path_process_len=6, + activation='mish') + elif type == '6': + model = decoders.YoloDecoder( + input_specs=input_specs, + embed_spp=False, + use_fpn=False, + max_level_process_len=None, + path_process_len=6, + activation='mish') + else: + raise NotImplementedError(f"YOLO decoder test {type} not implemented.") + return model + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yolo/modeling/heads/__init__.py b/official/vision/beta/projects/yolo/modeling/heads/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py b/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py new file mode 100644 index 00000000000..4d7d082a00c --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/heads/yolo_head.py @@ -0,0 +1,120 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +import tensorflow as tf +from official.vision.beta.projects.yolo.modeling.layers import nn_blocks + + +class YoloHead(tf.keras.layers.Layer): + """YOLO Prediction Head""" + + def __init__(self, + min_level, + max_level, + classes=80, + boxes_per_level=3, + output_extras=0, + norm_momentum=0.99, + norm_epsilon=0.001, + kernel_initializer='glorot_uniform', + kernel_regularizer=None, + bias_regularizer=None, + activation=None, + **kwargs): + """Yolo Prediction Head initialization function. + + Args: + min_level: `int`, the minimum backbone output level. + max_level: `int`, the maximum backbone output level. + classes: `int`, number of classes per category. + boxes_per_level: `int`, number of boxes to predict per level. + output_extras: `int`, number of additional output channels that the head. + should predict for non-object detection and non-image classification + tasks. + norm_momentum: `float`, normalization momentum for the moving average. + norm_epsilon: `float`, small float added to variance to avoid dividing by + zero. + kernel_initializer: kernel_initializer for convolutional layers. + kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + activation: `str`, the activation function to use typically leaky or mish. + **kwargs: keyword arguments to be passed. + """ + + super().__init__(**kwargs) + self._min_level = min_level + self._max_level = max_level + + self._key_list = [ + str(key) for key in range(self._min_level, self._max_level + 1) + ] + + self._classes = classes + self._boxes_per_level = boxes_per_level + self._output_extras = output_extras + + self._output_conv = (classes + output_extras + 5) * boxes_per_level + + self._base_config = dict( + activation=activation, + norm_momentum=norm_momentum, + norm_epsilon=norm_epsilon, + kernel_initializer=kernel_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer) + + self._conv_config = dict( + filters=self._output_conv, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + use_bn=False, + **self._base_config) + + def build(self, input_shape): + self._head = dict() + for key in self._key_list: + self._head[key] = nn_blocks.ConvBN(**self._conv_config) + + def call(self, inputs): + outputs = dict() + for key in self._key_list: + outputs[key] = self._head[key](inputs[key]) + return outputs + + @property + def output_depth(self): + return (self._classes + self._output_extras + 5) * self._boxes_per_level + + @property + def num_boxes(self): + if self._min_level is None or self._max_level is None: + raise Exception( + 'Model has to be built before number of boxes can be determined.') + return (self._max_level - self._min_level + 1) * self._boxes_per_level + + def get_config(self): + config = dict( + min_level=self._min_level, + max_level=self._max_level, + classes=self._classes, + boxes_per_level=self._boxes_per_level, + output_extras=self._output_extras, + **self._base_config) + return config + + @classmethod + def from_config(cls, config, custom_objects=None): + return cls(**config) diff --git a/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py b/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py new file mode 100644 index 00000000000..422f1a9a8e2 --- /dev/null +++ b/official/vision/beta/projects/yolo/modeling/heads/yolo_head_test.py @@ -0,0 +1,77 @@ +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Tests for yolo heads.""" + +# Import libraries +from absl.testing import parameterized +import numpy as np +import tensorflow as tf + +from tensorflow.python.distribute import combinations +from tensorflow.python.distribute import strategy_combinations +from official.vision.beta.projects.yolo.modeling.heads import yolo_head as heads + + +class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): + + def test_network_creation(self): + """Test creation of YOLO family models.""" + tf.keras.backend.set_image_data_format('channels_last') + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + classes = 100 + bps = 3 + head = heads.YoloHead(3, 5, classes=classes, boxes_per_level=bps) + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + endpoints = head(inputs) + # print(endpoints) + + for key in endpoints.keys(): + expected_input_shape = input_shape[key] + expected_input_shape[-1] = (classes + 5) * bps + self.assertAllEqual(endpoints[key].shape.as_list(), expected_input_shape) + + def test_serialize_deserialize(self): + # Create a network object that sets all of its config options. + tf.keras.backend.set_image_data_format('channels_last') + input_shape = { + '3': [1, 52, 52, 256], + '4': [1, 26, 26, 512], + '5': [1, 13, 13, 1024] + } + classes = 100 + bps = 3 + head = heads.YoloHead(3, 5, classes=classes, boxes_per_level=bps) + + inputs = {} + for key in input_shape.keys(): + inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) + + _ = head(inputs) + configs = head.get_config() + head_from_config = heads.YoloHead.from_config(configs) + self.assertAllEqual(head.get_config(), head_from_config.get_config()) + + +if __name__ == '__main__': + tf.test.main() diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py index 8bc6a78078a..9897def3ad3 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py @@ -15,62 +15,64 @@ # Lint as: python3 """Contains common building blocks for yolo neural networks.""" - from typing import Callable, List import tensorflow as tf from official.modeling import tf_utils +from official.vision.beta.ops import spatial_transform_ops -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class Identity(tf.keras.layers.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + def call(self, inputs): return inputs -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class ConvBN(tf.keras.layers.Layer): - """Modified Convolution layer to match that of the DarkNet Library. - + """ + Modified Convolution layer to match that of the Darknet Library. The Layer is a standards combination of Conv BatchNorm Activation, - however, the use of bias in the conv is determined by the use of batch norm. - + however, the use of bias in the conv is determined by the use of batch + normalization. Cross Stage Partial networks (CSPNets) were proposed in: - [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang - Chen, Jun-Wei Hsieh. - CSPNet: A New Backbone that can Enhance Learning Capability of CNN. - arXiv:1911.11929 + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, + Ping-Yang Chen, Jun-Wei Hsieh + CSPNet: A New Backbone that can Enhance Learning Capability of CNN. + arXiv:1911.11929 """ def __init__(self, filters=1, kernel_size=(1, 1), strides=(1, 1), - padding="same", + padding='same', dilation_rate=(1, 1), - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', bias_regularizer=None, + kernel_regularizer=None, use_bn=True, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, - activation="leaky", + activation='leaky', leaky_alpha=0.1, **kwargs): - """Initializes ConvBN layer. - + """ Args: - filters: integer for output depth, or the number of features to learn + filters: integer for output depth, or the number of features to learn. kernel_size: integer or tuple for the shape of the weight matrix or kernel to learn. strides: integer of tuple how much to move the kernel after each kernel - use padding: string 'valid' or 'same', if same, then pad the image, else - do not. - padding: `str`, padding method for conv layers. - dilation_rate: tuple to indicate how much to modulate kernel weights and - how many pixels in a feature map to skip. + use. + padding: string 'valid' or 'same', if same, then pad the image, else do + not. + dialtion_rate: tuple to indicate how much to modulate kernel weights and + how many pixels in a feature map to skip. kernel_initializer: string to indicate which function to use to initialize weights. bias_initializer: string to indicate which function to use to initialize @@ -80,14 +82,17 @@ def __init__(self, bias_regularizer: string to indicate which function to use to regularizer bias. use_bn: boolean for whether to use batch normalization. - use_sync_bn: boolean for whether sync batch normalization. - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global statistics + (across all input batches). + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. activation: string or None for activation function to use in layer, - if None activation is replaced by linear. + if None activation is replaced by linear. leaky_alpha: float to use as alpha if activation function is leaky. - **kwargs: Keyword Arguments + **kwargs: Keyword Arguments. """ + # convolution params self._filters = filters self._kernel_size = kernel_size @@ -97,15 +102,16 @@ def __init__(self, self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer # batch normalization params self._use_bn = use_bn self._use_sync_bn = use_sync_bn - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon - if tf.keras.backend.image_data_format() == "channels_last": + if tf.keras.backend.image_data_format() == 'channels_last': # format: (batch_size, height, width, channels) self._bn_axis = -1 else: @@ -116,7 +122,7 @@ def __init__(self, self._activation = activation self._leaky_alpha = leaky_alpha - super(ConvBN, self).__init__(**kwargs) + super().__init__(**kwargs) def build(self, input_shape): use_bias = not self._use_bn @@ -136,101 +142,101 @@ def build(self, input_shape): if self._use_bn: if self._use_sync_bn: self.bn = tf.keras.layers.experimental.SyncBatchNormalization( - momentum=self._norm_moment, + momentum=self._norm_momentum, epsilon=self._norm_epsilon, axis=self._bn_axis) else: self.bn = tf.keras.layers.BatchNormalization( - momentum=self._norm_moment, + momentum=self._norm_momentum, epsilon=self._norm_epsilon, axis=self._bn_axis) - else: - self.bn = Identity() - if self._activation == "leaky": + if self._activation == 'leaky': self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha) - elif self._activation == "mish": + elif self._activation == 'mish': self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) else: self._activation_fn = tf_utils.get_activation(self._activation) def call(self, x): x = self.conv(x) - x = self.bn(x) + if self._use_bn: + x = self.bn(x) x = self._activation_fn(x) return x def get_config(self): # used to store/share parameters to reconstruct the model layer_config = { - "filters": self._filters, - "kernel_size": self._kernel_size, - "strides": self._strides, - "padding": self._padding, - "dilation_rate": self._dilation_rate, - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "kernel_regularizer": self._kernel_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_moment": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._activation, - "leaky_alpha": self._leaky_alpha + 'filters': self._filters, + 'kernel_size': self._kernel_size, + 'strides': self._strides, + 'padding': self._padding, + 'dilation_rate': self._dilation_rate, + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'kernel_regularizer': self._kernel_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._activation, + 'leaky_alpha': self._leaky_alpha } - layer_config.update(super(ConvBN, self).get_config()) + layer_config.update(super().get_config()) return layer_config - def __repr__(self): - return repr(self.get_config()) - -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class DarkResidual(tf.keras.layers.Layer): - """DarkNet block with Residual connection for Yolo v3 Backbone. + """ + Darknet block with Residual connection for Yolo v3 Backbone """ def __init__(self, filters=1, filter_scale=2, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", + dilation_rate=1, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, use_bn=True, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, - activation="leaky", + activation='leaky', leaky_alpha=0.1, - sc_activation="linear", + sc_activation='linear', downsample=False, **kwargs): - """Initializes DarkResidual. - + """ Args: filters: integer for output depth, or the number of features to learn. - filter_scale: `int`, scale factor for number of filters. kernel_initializer: string to indicate which function to use to initialize - weights + weights. bias_initializer: string to indicate which function to use to initialize - bias + bias. kernel_regularizer: string to indicate which function to use to - regularizer weights + regularizer weights. bias_regularizer: string to indicate which function to use to regularizer - bias - use_bn: boolean for whether to use batch normalization - use_sync_bn: boolean for whether sync batch normalization. - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon - activation: string for activation function to use in conv layers. - leaky_alpha: float to use as alpha if activation function is leaky - sc_activation: string for activation function to use in layer + bias. + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization statistics. + of all batch norm layers to the models global statistics + (across all input batches). + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + conv_activation: string or None for activation function to use in layer, + if None activation is replaced by linear. + leaky_alpha: float to use as alpha if activation function is leaky. + sc_activation: string for activation function to use in layer. downsample: boolean for if image input is larger than layer output, set - downsample to True so the dimensions are forced to match - **kwargs: Keyword Arguments + downsample to True so the dimensions are forced to match. + **kwargs: Keyword Arguments. """ + # downsample self._downsample = downsample @@ -245,8 +251,10 @@ def __init__(self, self._kernel_regularizer = kernel_regularizer # normal params - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon + self._dilation_rate = dilation_rate if isinstance(dilation_rate, + int) else dilation_rate[0] # activation params self._conv_activation = activation @@ -256,138 +264,150 @@ def __init__(self, super().__init__(**kwargs) def build(self, input_shape): - self._dark_conv_args = { - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_momentum": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._conv_activation, - "kernel_regularizer": self._kernel_regularizer, - "leaky_alpha": self._leaky_alpha + dark_conv_args = { + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._conv_activation, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha } if self._downsample: + if self._dilation_rate > 1: + dilation_rate = 1 + if self._dilation_rate // 2 > 0: + dilation_rate = self._dilation_rate // 2 + down_stride = 1 + else: + dilation_rate = 1 + down_stride = 2 + self._dconv = ConvBN( filters=self._filters, kernel_size=(3, 3), - strides=(2, 2), - padding="same", - **self._dark_conv_args) - else: - self._dconv = Identity() + strides=down_stride, + dilation_rate=dilation_rate, + padding='same', + **dark_conv_args) self._conv1 = ConvBN( filters=self._filters // self._filter_scale, kernel_size=(1, 1), strides=(1, 1), - padding="same", - **self._dark_conv_args) + padding='same', + **dark_conv_args) self._conv2 = ConvBN( filters=self._filters, kernel_size=(3, 3), strides=(1, 1), - padding="same", - **self._dark_conv_args) + dilation_rate=self._dilation_rate, + padding='same', + **dark_conv_args) self._shortcut = tf.keras.layers.Add() - if self._sc_activation == "leaky": - self._activation_fn = tf.keras.layers.LeakyReLU( - alpha=self._leaky_alpha) - elif self._sc_activation == "mish": + if self._sc_activation == 'leaky': + self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha) + elif self._sc_activation == 'mish': self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) else: - self._activation_fn = tf_utils.get_activation(self._sc_activation) + self._activation_fn = tf_utils.get_activation( + self._sc_activation + ) super().build(input_shape) - def call(self, inputs): - shortcut = self._dconv(inputs) - x = self._conv1(shortcut) + def call(self, inputs, training=None): + if self._downsample: + inputs = self._dconv(inputs) + x = self._conv1(inputs) x = self._conv2(x) - x = self._shortcut([x, shortcut]) + x = self._shortcut([x, inputs]) return self._activation_fn(x) def get_config(self): # used to store/share parameters to reconstruct the model layer_config = { - "filters": self._filters, - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "kernel_regularizer": self._kernel_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_moment": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._conv_activation, - "leaky_alpha": self._leaky_alpha, - "sc_activation": self._sc_activation, - "downsample": self._downsample + 'filters': self._filters, + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'kernel_regularizer': self._kernel_regularizer, + 'dilation_rate': self._dilation_rate, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._conv_activation, + 'leaky_alpha': self._leaky_alpha, + 'sc_activation': self._sc_activation, + 'downsample': self._downsample, } layer_config.update(super().get_config()) return layer_config -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class CSPTiny(tf.keras.layers.Layer): - """A Small size convolution block proposed in the CSPNet. - - The layer uses shortcuts, routing(concatnation), and feature grouping - in order to improve gradient variablity and allow for high efficency, low - power residual learning for small networtf.keras. - + """ + A Small size convolution block proposed in the CSPNet. The layer uses + shortcuts, routing(concatnation), and feature grouping in order to improve + gradient variablity and allow for high efficency, low power residual learning + for small networtf.keras. Cross Stage Partial networks (CSPNets) were proposed in: - [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang - Chen, Jun-Wei Hsieh + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, + Ping-Yang Chen, Jun-Wei Hsieh CSPNet: A New Backbone that can Enhance Learning Capability of CNN. - arXiv:1911.11929 + arXiv:1911.11929 """ def __init__(self, filters=1, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', bias_regularizer=None, + kernel_regularizer=None, use_bn=True, + dilation_rate=1, use_sync_bn=False, group_id=1, groups=2, norm_momentum=0.99, norm_epsilon=0.001, - activation="leaky", + activation='leaky', downsample=True, leaky_alpha=0.1, **kwargs): - """Initializes CSPTiny. - + """ Args: - filters: integer for output depth, or the number of features to learn + filters: integer for output depth, or the number of features to learn. kernel_initializer: string to indicate which function to use to initialize - weights + weights. bias_initializer: string to indicate which function to use to initialize - bias + bias. + use_bn: boolean for whether to use batch normalization. kernel_regularizer: string to indicate which function to use to - regularizer weights + regularizer weights. bias_regularizer: string to indicate which function to use to regularizer - bias - use_bn: boolean for whether to use batch normalization - use_sync_bn: boolean for whether sync batch normalization statistics of - all batch norm layers to the models global statistics (across all input - batches) - group_id: integer for which group of features to pass through the csp tiny - stack. + bias. + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global statistics + (across all input batches). + group_id: integer for which group of features to pass through the csp + tiny stack. groups: integer for how many splits there should be in the convolution - feature stack output - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon - activation: string or None for activation function to use in layer, - if None activation is replaced by linear + feature stack output. + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + conv_activation: string or None for activation function to use in layer, + if None activation is replaced by linear. + leaky_alpha: float to use as alpha if activation function is leaky. + sc_activation: string for activation function to use in layer. downsample: boolean for if image input is larger than layer output, set - downsample to True so the dimensions are forced to match - leaky_alpha: float to use as alpha if activation function is leaky - **kwargs: Keyword Arguments + downsample to True so the dimensions are forced to match. + **kwargs: Keyword Arguments. """ # ConvBN params @@ -396,6 +416,7 @@ def __init__(self, self._bias_initializer = bias_initializer self._bias_regularizer = bias_regularizer self._use_bn = use_bn + self._dilation_rate = dilation_rate self._use_sync_bn = use_sync_bn self._kernel_regularizer = kernel_regularizer self._groups = groups @@ -403,7 +424,7 @@ def __init__(self, self._downsample = downsample # normal params - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon # activation params @@ -413,37 +434,37 @@ def __init__(self, super().__init__(**kwargs) def build(self, input_shape): - self._dark_conv_args = { - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_momentum": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._conv_activation, - "kernel_regularizer": self._kernel_regularizer, - "leaky_alpha": self._leaky_alpha + dark_conv_args = { + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._conv_activation, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha } self._convlayer1 = ConvBN( filters=self._filters, kernel_size=(3, 3), strides=(1, 1), - padding="same", - **self._dark_conv_args) + padding='same', + **dark_conv_args) self._convlayer2 = ConvBN( filters=self._filters // 2, kernel_size=(3, 3), strides=(1, 1), - padding="same", + padding='same', kernel_initializer=self._kernel_initializer, bias_initializer=self._bias_initializer, bias_regularizer=self._bias_regularizer, kernel_regularizer=self._kernel_regularizer, use_bn=self._use_bn, use_sync_bn=self._use_sync_bn, - norm_momentum=self._norm_moment, + norm_momentum=self._norm_momentum, norm_epsilon=self._norm_epsilon, activation=self._conv_activation, leaky_alpha=self._leaky_alpha) @@ -452,22 +473,23 @@ def build(self, input_shape): filters=self._filters // 2, kernel_size=(3, 3), strides=(1, 1), - padding="same", - **self._dark_conv_args) + padding='same', + **dark_conv_args) self._convlayer4 = ConvBN( filters=self._filters, kernel_size=(1, 1), strides=(1, 1), - padding="same", - **self._dark_conv_args) + padding='same', + **dark_conv_args) - self._maxpool = tf.keras.layers.MaxPool2D( - pool_size=2, strides=2, padding="same", data_format=None) + if self._downsample: + self._maxpool = tf.keras.layers.MaxPool2D( + pool_size=2, strides=2, padding='same', data_format=None) super().build(input_shape) - def call(self, inputs): + def call(self, inputs, training=None): x1 = self._convlayer1(inputs) x1_group = tf.split(x1, self._groups, axis=-1)[self._group_id] x2 = self._convlayer2(x1_group) # grouping @@ -479,74 +501,55 @@ def call(self, inputs): x = self._maxpool(x) return x, x5 - def get_config(self): - # used to store/share parameters to reconsturct the model - layer_config = { - "filters": self._filters, - "strides": self._strides, - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "kernel_regularizer": self._kernel_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_moment": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._conv_activation, - "leaky_alpha": self._leaky_alpha, - "sc_activation": self._sc_activation, - } - layer_config.update(super().get_config()) - return layer_config - -@tf.keras.utils.register_keras_serializable(package="yolo") +@tf.keras.utils.register_keras_serializable(package='yolo') class CSPRoute(tf.keras.layers.Layer): - """Down sampling layer to take the place of down sampleing. - - It is applied in Residual networks. This is the first of 2 layers needed to - convert any Residual Network model to a CSPNet. At the start of a new level - change, this CSPRoute layer creates a learned identity that will act as a - cross stage connection, that is used to inform the inputs to the next stage. - It is called cross stage partial because the number of filters required in - every intermitent Residual layer is reduced by half. The sister layer will - take the partial generated by this layer and concatnate it with the output of - the final residual layer in the stack to create a fully feature level output. - This concatnation merges the partial blocks of 2 levels as input to the next - allowing the gradients of each level to be more unique, and reducing the - number of parameters required by each level by 50% while keeping accuracy - consistent. + """ + Down sampling layer to take the place of down sampleing done in Residual + networks. This is the first of 2 layers needed to convert any Residual Network + model to a CSPNet. At the start of a new level change, this CSPRoute layer + creates a learned identity that will act as a cross stage connection, + that is used to inform the inputs to the next stage. It is called cross stage + partial because the number of filters required in every intermitent Residual + layer is reduced by half. The sister layer will take the partial generated by + this layer and concatnate it with the output of the final residual layer in + the stack to create a fully feature level output. This concatnation merges the + partial blocks of 2 levels as input to the next allowing the gradients of each + level to be more unique, and reducing the number of parameters required by + each level by 50% while keeping accuracy consistent. Cross Stage Partial networks (CSPNets) were proposed in: - [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang - Chen, Jun-Wei Hsieh. + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, + Ping-Yang Chen, Jun-Wei Hsieh CSPNet: A New Backbone that can Enhance Learning Capability of CNN. - arXiv:1911.11929 + arXiv:1911.11929 """ def __init__(self, filters, filter_scale=2, - activation="mish", - downsample=True, - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, + activation='mish', + kernel_initializer='glorot_uniform', + bias_initializer='zeros', bias_regularizer=None, + kernel_regularizer=None, + dilation_rate=1, use_bn=True, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, + downsample=True, + leaky_alpha=0.1, **kwargs): - """Initializes CSPRoute. - + """ Args: filters: integer for output depth, or the number of features to learn filter_scale: integer dicating (filters//2) or the number of filters in the partial feature stack. - activation: string for activation function to use in layer downsample: down_sample the input. - kernel_initializer: string to indicate which function to use to initialize - weights. + activation: string for activation function to use in layer. + kernel_initializer: string to indicate which function to use to + initialize weights. bias_initializer: string to indicate which function to use to initialize bias. kernel_regularizer: string to indicate which function to use to @@ -554,99 +557,117 @@ def __init__(self, bias_regularizer: string to indicate which function to use to regularizer bias. use_bn: boolean for whether to use batch normalization. - use_sync_bn: boolean for whether sync batch normalization. - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon - **kwargs: Keyword Arguments + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global statistics + (across all input batches). + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + **kwargs: Keyword Arguments. """ super().__init__(**kwargs) - # Layer params. + # layer params self._filters = filters self._filter_scale = filter_scale self._activation = activation - # Convoultion params. + # convoultion params self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer + self._dilation_rate = dilation_rate self._use_bn = use_bn self._use_sync_bn = use_sync_bn - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon self._downsample = downsample + self._leaky_alpha = leaky_alpha def build(self, input_shape): - self._dark_conv_args = { - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_momentum": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._activation, - "kernel_regularizer": self._kernel_regularizer, + dark_conv_args = { + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._activation, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha, } if self._downsample: - self._conv1 = ConvBN(filters=self._filters, - kernel_size=(3, 3), - strides=(2, 2), - **self._dark_conv_args) - else: - self._conv1 = ConvBN(filters=self._filters, - kernel_size=(3, 3), - strides=(1, 1), - **self._dark_conv_args) - self._conv2 = ConvBN(filters=self._filters // self._filter_scale, - kernel_size=(1, 1), - strides=(1, 1), - **self._dark_conv_args) - - self._conv3 = ConvBN(filters=self._filters // self._filter_scale, - kernel_size=(1, 1), - strides=(1, 1), - **self._dark_conv_args) + if self._dilation_rate > 1: + dilation_rate = 1 + if self._dilation_rate // 2 > 0: + dilation_rate = self._dilation_rate // 2 + down_stride = 1 + else: + dilation_rate = 1 + down_stride = 2 - def call(self, inputs): - x = self._conv1(inputs) - y = self._conv2(x) - x = self._conv3(x) - return (x, y) + self._conv1 = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=down_stride, + dilation_rate=dilation_rate, + **dark_conv_args) + self._conv2 = ConvBN( + filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + **dark_conv_args) -@tf.keras.utils.register_keras_serializable(package="yolo") -class CSPConnect(tf.keras.layers.Layer): - """Sister Layer to the CSPRoute layer. + self._conv3 = ConvBN( + filters=self._filters // self._filter_scale, + kernel_size=(1, 1), + strides=(1, 1), + **dark_conv_args) + + def call(self, inputs, training=None): + if self._downsample: + inputs = self._conv1(inputs) + y = self._conv2(inputs) + x = self._conv3(inputs) + return (x, y) - Merges the partial feature stacks generated by the CSPDownsampling layer, - and the finaly output of the residual stack. Suggested in the CSPNet paper. +@tf.keras.utils.register_keras_serializable(package='yolo') +class CSPConnect(tf.keras.layers.Layer): + """ + Sister Layer to the CSPRoute layer. Merges the partial feature stacks + generated by the CSPDownsampling layer, and the finaly output of the + residual stack. Suggested in the CSPNet paper. Cross Stage Partial networks (CSPNets) were proposed in: - [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang - Chen, Jun-Wei Hsieh. + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, + Ping-Yang Chen, Jun-Wei Hsieh CSPNet: A New Backbone that can Enhance Learning Capability of CNN. - arXiv:1911.11929 + arXiv:1911.11929 """ def __init__(self, filters, filter_scale=2, - activation="mish", - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, + drop_final=False, + drop_first=False, + activation='mish', + kernel_size=(1, 1), + kernel_initializer='glorot_uniform', + bias_initializer='zeros', bias_regularizer=None, + kernel_regularizer=None, + dilation_rate=1, use_bn=True, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, + leaky_alpha=0.1, **kwargs): - """Initializes CSPConnect. - + """ Args: - filters: integer for output depth, or the number of features to learn. + filters: integer for output depth, or the number of features to learn filter_scale: integer dicating (filters//2) or the number of filters in the partial feature stack. activation: string for activation function to use in layer. @@ -659,96 +680,112 @@ def __init__(self, bias_regularizer: string to indicate which function to use to regularizer bias. use_bn: boolean for whether to use batch normalization. - use_sync_bn: boolean for whether sync batch normalization. - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon - **kwargs: Keyword Arguments + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global + statistics (across all input batches). + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + **kwargs: Keyword Arguments. """ + super().__init__(**kwargs) - # layer params. + # layer params self._filters = filters self._filter_scale = filter_scale self._activation = activation - # Convoultion params. + # convoultion params + self._kernel_size = kernel_size self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer self._use_bn = use_bn self._use_sync_bn = use_sync_bn - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon + self._drop_final = drop_final + self._drop_first = drop_first + self._leaky_alpha = leaky_alpha def build(self, input_shape): - self._dark_conv_args = { - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_momentum": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "activation": self._activation, - "kernel_regularizer": self._kernel_regularizer, + dark_conv_args = { + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._activation, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha, } - self._conv1 = ConvBN(filters=self._filters // self._filter_scale, - kernel_size=(1, 1), - strides=(1, 1), - **self._dark_conv_args) + if not self._drop_first: + self._conv1 = ConvBN( + filters=self._filters // self._filter_scale, + kernel_size=self._kernel_size, + strides=(1, 1), + **dark_conv_args) self._concat = tf.keras.layers.Concatenate(axis=-1) - self._conv2 = ConvBN(filters=self._filters, - kernel_size=(1, 1), - strides=(1, 1), - **self._dark_conv_args) - def call(self, inputs): + if not self._drop_final: + self._conv2 = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + **dark_conv_args) + + def call(self, inputs, training=None): x_prev, x_csp = inputs - x = self._conv1(x_prev) - x = self._concat([x, x_csp]) - x = self._conv2(x) + if not self._drop_first: + x_prev = self._conv1(x_prev) + x = self._concat([x_prev, x_csp]) + + # skipped if drop final is true + if not self._drop_final: + x = self._conv2(x) return x class CSPStack(tf.keras.layers.Layer): - """CSP full stack. - - Combines the route and the connect in case you dont want to just quickly wrap - an existing callable or list of layers to make it a cross stage partial. - Added for ease of use. you should be able to wrap any layer stack with a CSP - independent of wether it belongs to the Darknet family. if filter_scale = 2, - then the blocks in the stack passed into the the CSP stack should also have - filters = filters/filter_scale. - + """ + CSP full stack, combines the route and the connect in case you dont want to + jsut quickly wrap an existing callable or list of layers to + make it a cross stage partial. Added for ease of use. you should be able + to wrap any layer stack with a CSP independent of wether it belongs + to the Darknet family. if filter_scale = 2, then the blocks in the stack + passed into the the CSP stack should also have filters = filters/filter_scale Cross Stage Partial networks (CSPNets) were proposed in: - [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, Ping-Yang - Chen, Jun-Wei Hsieh + + [1] Chien-Yao Wang, Hong-Yuan Mark Liao, I-Hau Yeh, Yueh-Hua Wu, + Ping-Yang Chen, Jun-Wei Hsieh CSPNet: A New Backbone that can Enhance Learning Capability of CNN. - arXiv:1911.11929 + arXiv:1911.11929 """ def __init__(self, filters, model_to_wrap=None, filter_scale=2, - activation="mish", - kernel_initializer="glorot_uniform", - bias_initializer="zeros", - kernel_regularizer=None, + activation='mish', + kernel_initializer='glorot_uniform', + bias_initializer='zeros', bias_regularizer=None, + kernel_regularizer=None, downsample=True, use_bn=True, use_sync_bn=False, norm_momentum=0.99, norm_epsilon=0.001, **kwargs): - """Initializes CSPStack. - + """ Args: - filters: integer for output depth, or the number of features to learn. model_to_wrap: callable Model or a list of callable objects that will - process the output of CSPRoute, and be input into CSPConnect. List will - be called sequentially. + process the output of CSPRoute, and be input into CSPConnect. + list will be called sequentially. + downsample: down_sample the input. + filters: integer for output depth, or the number of features to learn. filter_scale: integer dicating (filters//2) or the number of filters in the partial feature stack. activation: string for activation function to use in layer. @@ -760,62 +797,793 @@ def __init__(self, regularizer weights. bias_regularizer: string to indicate which function to use to regularizer bias. - downsample: down_sample the input. - use_bn: boolean for whether to use batch normalization - use_sync_bn: boolean for whether sync batch normalization. - norm_momentum: float for moment to use for batch normalization - norm_epsilon: float for batch normalization epsilon - **kwargs: Keyword Arguments + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global statistics + (across all input batches). + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + **kwargs: Keyword Arguments. + + Raises: + TypeError: model_to_wrap is not a layer or a list of layers """ + super().__init__(**kwargs) - # Layer params. + # layer params self._filters = filters self._filter_scale = filter_scale self._activation = activation self._downsample = downsample - # Convoultion params. + # convoultion params self._kernel_initializer = kernel_initializer self._bias_initializer = bias_initializer self._kernel_regularizer = kernel_regularizer self._bias_regularizer = bias_regularizer self._use_bn = use_bn self._use_sync_bn = use_sync_bn - self._norm_moment = norm_momentum + self._norm_momentum = norm_momentum self._norm_epsilon = norm_epsilon - if model_to_wrap is not None: - if isinstance(model_to_wrap, Callable): - self._model_to_wrap = [model_to_wrap] - elif isinstance(model_to_wrap, List): - self._model_to_wrap = model_to_wrap - else: - raise ValueError("The input to the CSPStack must be a list of layers" - "that we can iterate through, or \n a callable") - else: + if model_to_wrap is None: self._model_to_wrap = [] + elif isinstance(model_to_wrap, Callable): + self._model_to_wrap = [model_to_wrap] + elif isinstance(model_to_wrap, List): + self._model_to_wrap = model_to_wrap + else: + raise TypeError( + 'the input to the CSPStack must be a list of layers that we can' + + 'iterate through, or \n a callable') def build(self, input_shape): - self._dark_conv_args = { - "filters": self._filters, - "filter_scale": self._filter_scale, - "activation": self._activation, - "kernel_initializer": self._kernel_initializer, - "bias_initializer": self._bias_initializer, - "bias_regularizer": self._bias_regularizer, - "use_bn": self._use_bn, - "use_sync_bn": self._use_sync_bn, - "norm_momentum": self._norm_moment, - "norm_epsilon": self._norm_epsilon, - "kernel_regularizer": self._kernel_regularizer, + dark_conv_args = { + 'filters': self._filters, + 'filter_scale': self._filter_scale, + 'activation': self._activation, + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_regularizer': self._kernel_regularizer, } - self._route = CSPRoute(downsample=self._downsample, **self._dark_conv_args) - self._connect = CSPConnect(**self._dark_conv_args) - return + self._route = CSPRoute(downsample=self._downsample, **dark_conv_args) + self._connect = CSPConnect(**dark_conv_args) - def call(self, inputs): + def call(self, inputs, training=None): x, x_route = self._route(inputs) for layer in self._model_to_wrap: x = layer(x) x = self._connect([x, x_route]) return x + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class PathAggregationBlock(tf.keras.layers.Layer): + + def __init__(self, + filters=1, + drop_final=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + bias_regularizer=None, + kernel_regularizer=None, + use_bn=True, + use_sync_bn=False, + inverted=False, + norm_momentum=0.99, + norm_epsilon=0.001, + activation='leaky', + leaky_alpha=0.1, + downsample=False, + upsample=False, + upsample_size=2, + **kwargs): + """ + Args: + filters: integer for output depth, or the number of features to learn. + drop_final: do not create the last convolution block. + kernel_initializer: string to indicate which function to use to initialize + weights. + bias_initializer: string to indicate which function to use to initialize + bias. + kernel_regularizer: string to indicate which function to use to + regularizer weights. + bias_regularizer: string to indicate which function to use to regularizer + bias. + use_bn: boolean for whether to use batch normalization. + use_sync_bn: boolean for whether sync batch normalization statistics + of all batch norm layers to the models global statistics + (across all input batches). + inverted: boolean for inverting the order of the convolutions. + norm_momentum: float for moment to use for batch normalization. + norm_epsilon: float for batch normalization epsilon. + activation: string or None for activation function to use in layer, + if None activation is replaced by linear. + leaky_alpha: float to use as alpha if activation function is leaky. + downsample: `bool` for whehter to downwample and merge. + upsample: `bool` for whehter to upsample and merge. + upsample_size: `int` how much to upsample in order to match shapes. + **kwargs: Keyword Arguments. + """ + + # darkconv params + self._filters = filters + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._bias_regularizer = bias_regularizer + self._kernel_regularizer = kernel_regularizer + self._use_bn = use_bn + self._use_sync_bn = use_sync_bn + + # normal params + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + + # activation params + self._conv_activation = activation + self._leaky_alpha = leaky_alpha + self._downsample = downsample + self._upsample = upsample + self._upsample_size = upsample_size + self._drop_final = drop_final + + #block params + self._inverted = inverted + + super().__init__(**kwargs) + + def _build_regular(self, input_shape, kwargs): + if self._downsample: + self._conv = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=(2, 2), + padding='same', + **kwargs) + else: + self._conv = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + **kwargs) + + if not self._drop_final: + self._conv_concat = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + **kwargs) + + def _build_reversed(self, input_shape, kwargs): + if self._downsample: + self._conv_prev = ConvBN( + filters=self._filters, + kernel_size=(3, 3), + strides=(2, 2), + padding='same', + **kwargs) + else: + self._conv_prev = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + **kwargs) + + self._conv_route = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + **kwargs) + + if not self._drop_final: + self._conv_sync = ConvBN( + filters=self._filters, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + **kwargs) + + def build(self, input_shape): + dark_conv_args = { + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_bn': self._use_bn, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'activation': self._conv_activation, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha, + } + + if self._inverted: + self._build_reversed(input_shape, dark_conv_args) + else: + self._build_regular(input_shape, dark_conv_args) + + self._concat = tf.keras.layers.Concatenate() + super().build(input_shape) + + def _call_regular(self, inputs, training=None): + input_to_convolve, input_to_concat = inputs + x_prev = self._conv(input_to_convolve) + if self._upsample: + x_prev = spatial_transform_ops.nearest_upsampling(x_prev, + self._upsample_size) + x = self._concat([x_prev, input_to_concat]) + + # used in csp conversion + if not self._drop_final: + x = self._conv_concat(x) + return x_prev, x + + def _call_reversed(self, inputs, training=None): + x_route, x_prev = inputs + x_prev = self._conv_prev(x_prev) + if self._upsample: + x_prev = spatial_transform_ops.nearest_upsampling(x_prev, + self._upsample_size) + x_route = self._conv_route(x_route) + x = self._concat([x_route, x_prev]) + if not self._drop_final: + x = self._conv_sync(x) + return x_prev, x + + def call(self, inputs, training=None): + # done this way to prevent confusion in the auto graph + if self._inverted: + return self._call_reversed(inputs, training=training) + else: + return self._call_regular(inputs, training=training) + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class SPP(tf.keras.layers.Layer): + """ + a non-agregated SPP layer that uses Pooling to gain more performance + """ + + def __init__(self, sizes, **kwargs): + self._sizes = list(reversed(sizes)) + if len(sizes) == 0: + raise ValueError('More than one maxpool should be specified in SSP block') + super().__init__(**kwargs) + + def build(self, input_shape): + maxpools = [] + for size in self._sizes: + maxpools.append( + tf.keras.layers.MaxPool2D( + pool_size=(size, size), + strides=(1, 1), + padding='same', + data_format=None)) + self._maxpools = maxpools + super().build(input_shape) + + def call(self, inputs, training=None): + outputs = [] + for maxpool in self._maxpools: + outputs.append(maxpool(inputs)) + outputs.append(inputs) + concat_output = tf.keras.layers.concatenate(outputs) + return concat_output + + def get_config(self): + layer_config = {'sizes': self._sizes} + layer_config.update(super().get_config()) + return layer_config + + +class SAM(tf.keras.layers.Layer): + """ + [1] Sanghyun Woo, Jongchan Park, Joon-Young Lee, In So Kweon + CBAM: Convolutional Block Attention Module. arXiv:1807.06521 + + implementation of the Spatial Attention Model (SAM) + """ + + def __init__(self, + use_pooling=False, + filter_match=False, + filters=1, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + dilation_rate=(1, 1), + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + bias_regularizer=None, + kernel_regularizer=None, + use_bn=True, + use_sync_bn=True, + norm_momentum=0.99, + norm_epsilon=0.001, + activation='sigmoid', + output_activation=None, + leaky_alpha=0.1, + **kwargs): + + # use_pooling + self._use_pooling = use_pooling + self._filters = filters + self._output_activation = output_activation + self._leaky_alpha = leaky_alpha + + self.dark_conv_args = { + 'kernel_size': kernel_size, + 'strides': strides, + 'padding': padding, + 'dilation_rate': dilation_rate, + 'kernel_initializer': kernel_initializer, + 'bias_initializer': bias_initializer, + 'bias_regularizer': bias_regularizer, + 'use_bn': use_bn, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'activation': activation, + 'kernel_regularizer': kernel_regularizer, + 'leaky_alpha': leaky_alpha + } + + super().__init__(**kwargs) + + def build(self, input_shape): + if self._filters == -1: + self._filters = input_shape[-1] + self._conv = ConvBN(filters=self._filters, **self.dark_conv_args) + if self._output_activation == 'leaky': + self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha) + elif self._output_activation == 'mish': + self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) + else: + self._activation_fn = tf_utils.get_activation(self._output_activation) + + def call(self, inputs, training=None): + if self._use_pooling: + depth_max = tf.reduce_max(inputs, axis=-1, keep_dims=True) + depth_avg = tf.reduce_mean(inputs, axis=-1, keep_dims=True) + input_maps = tf.concat([depth_avg, depth_max], axis=-1) + else: + input_maps = inputs + + attention_mask = self._conv(input_maps) + return self._activation_fn(inputs * attention_mask) + + +class CAM(tf.keras.layers.Layer): + """ + [1] Sanghyun Woo, Jongchan Park, Joon-Young Lee, In So Kweon + CBAM: Convolutional Block Attention Module. arXiv:1807.06521 + + implementation of the Channel Attention Model (CAM) + """ + + def __init__(self, + reduction_ratio=1.0, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + bias_regularizer=None, + kernel_regularizer=None, + use_bn=False, + use_sync_bn=False, + use_bias=False, + norm_momentum=0.99, + norm_epsilon=0.001, + mlp_activation='linear', + activation='sigmoid', + leaky_alpha=0.1, + **kwargs): + + self._reduction_ratio = reduction_ratio + + # use_pooling + if use_sync_bn: + self._bn = tf.keras.layers.experimental.SyncBatchNormalization + else: + self._bn = tf.keras.layers.BatchNormalization + + if not use_bn: + self._bn = Identity + self._bn_args = {} + else: + self._bn_args = { + 'momentum': norm_momentum, + 'epsilon': norm_epsilon, + } + + self._mlp_args = { + 'use_bias': use_bias, + 'kernel_initializer': kernel_initializer, + 'bias_initializer': bias_initializer, + 'bias_regularizer': bias_regularizer, + 'activation': mlp_activation, + 'kernel_regularizer': kernel_regularizer, + } + + self._leaky_alpha = leaky_alpha + self._activation = activation + + super().__init__(**kwargs) + + def build(self, input_shape): + self._filters = input_shape[-1] + + self._mlp = tf.keras.Sequential([ + tf.keras.layers.Dense(self._filters, **self._mlp_args), + self._bn(**self._bn_args), + tf.keras.layers.Dense( + int(self._filters * self._reduction_ratio), **self._mlp_args), + self._bn(**self._bn_args), + tf.keras.layers.Dense(self._filters, **self._mlp_args), + self._bn(**self._bn_args), + ]) + + if self._activation == 'leaky': + self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha) + elif self._activation == 'mish': + self._activation_fn = lambda x: x * tf.math.tanh(tf.math.softplus(x)) + else: + self._activation_fn = tf_utils.get_activation(self._activation) + + def call(self, inputs, training=None): + depth_max = self._mlp(tf.reduce_max(inputs, axis=(1, 2))) + depth_avg = self._mlp(tf.reduce_mean(inputs, axis=(1, 2))) + channel_mask = self._activation_fn(depth_avg + depth_max) + + channel_mask = tf.expand_dims(channel_mask, axis=1) + attention_mask = tf.expand_dims(channel_mask, axis=1) + + return inputs * attention_mask + + +class CBAM(tf.keras.layers.Layer): + """ + [1] Sanghyun Woo, Jongchan Park, Joon-Young Lee, In So Kweon + CBAM: Convolutional Block Attention Module. arXiv:1807.06521 + + implementation of the Convolution Block Attention Module (CBAM) + """ + + def __init__(self, + use_pooling=False, + filters=1, + reduction_ratio=1.0, + kernel_size=(1, 1), + strides=(1, 1), + padding='same', + dilation_rate=(1, 1), + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + bias_regularizer=None, + kernel_regularizer=None, + use_bn=True, + use_sync_bn=False, + norm_momentum=0.99, + norm_epsilon=0.001, + mlp_activation=None, + activation='sigmoid', + leaky_alpha=0.1, + **kwargs): + + # use_pooling + + self._sam_args = { + 'use_pooling': use_pooling, + 'filters': filters, + 'kernel_size': kernel_size, + 'strides': strides, + 'padding': padding, + 'dilation_rate': dilation_rate, + } + + self._cam_args = { + 'reduction_ratio': reduction_ratio, + 'mlp_activation': mlp_activation + } + + self._common_args = { + 'kernel_initializer': kernel_initializer, + 'bias_initializer': bias_initializer, + 'bias_regularizer': bias_regularizer, + 'use_bn': use_bn, + 'use_sync_bn': use_sync_bn, + 'norm_momentum': norm_momentum, + 'norm_epsilon': norm_epsilon, + 'activation': activation, + 'kernel_regularizer': kernel_regularizer, + 'leaky_alpha': leaky_alpha + } + + self._cam_args.update(self._common_args) + self._sam_args.update(self._common_args) + super().__init__(**kwargs) + + def build(self, input_shape): + self._cam = CAM(**self._cam_args) + self._sam = SAM(**self._sam_args) + + def call(self, inputs, training=None): + return self._sam(self._cam(inputs)) + + +@tf.keras.utils.register_keras_serializable(package='yolo') +class DarkRouteProcess(tf.keras.layers.Layer): + """ + process darknet outputs and connect back bone to head more generalizably + Abstracts repetition of DarkConv objects that is common in YOLO. + + It is used like the following: + + x = ConvBN(1024, (3, 3), (1, 1))(x) + proc = DarkRouteProcess(filters = 1024, + repetitions = 3, + insert_spp = False)(x) + """ + def __init__( + self, + filters=2, + repetitions=2, + insert_spp=False, + insert_sam=False, + insert_cbam=False, + csp_stack=0, + csp_scale=2, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + bias_regularizer=None, + use_sync_bn=False, + kernel_regularizer=None, # default find where is it is stated + norm_momentum=0.99, + norm_epsilon=0.001, + block_invert=False, + activation='leaky', + leaky_alpha=0.1, + spp_keys=None, + **kwargs): + """ + Args: + filters: the number of filters to be used in all subsequent layers + filters should be the depth of the tensor input into this layer, + as no downsampling can be done within this layer object. + repetitions: number of times to repeat the processign nodes + for tiny: 1 repition, no spp allowed + for spp: insert_spp = True, and allow for 3+ repetitions + for regular: insert_spp = False, and allow for 3+ repetitions. + insert_spp: bool if true add the spatial pyramid pooling layer. + kernel_initializer: method to use to initializa kernel weights. + bias_initializer: method to use to initialize the bias of the conv + layers. + norm_momentum: batch norm parameter see TensorFlow documentation. + norm_epsilon: batch norm parameter see TensorFlow documentation. + activation: activation function to use in processing. + leaky_alpha: if leaky acitivation function, the alpha to use in + processing the relu input. + + Returns: + callable tensorflow layer + + Raises: + None + """ + + super().__init__(**kwargs) + # darkconv params + self._filters = filters + self._use_sync_bn = use_sync_bn + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._bias_regularizer = bias_regularizer + self._kernel_regularizer = kernel_regularizer + + # normal params + self._norm_momentum = norm_momentum + self._norm_epsilon = norm_epsilon + + # activation params + self._activation = activation + self._leaky_alpha = leaky_alpha + + repetitions += (2 * int(insert_spp)) + if repetitions == 1: + block_invert = True + + self._repetitions = repetitions + self.layer_list, self.outputs = self._get_base_layers() + + if csp_stack > 0: + self._csp_scale = csp_scale + csp_stack += (2 * int(insert_spp)) + self._csp_filters = lambda x: x // csp_scale + self._convert_csp(self.layer_list, self.outputs, csp_stack) + block_invert = False + + self._csp_stack = csp_stack + + if block_invert: + self._conv1_filters = lambda x: x + self._conv2_filters = lambda x: x // 2 + self._conv1_kernel = (3, 3) + self._conv2_kernel = (1, 1) + else: + self._conv1_filters = lambda x: x // 2 + self._conv2_filters = lambda x: x + self._conv1_kernel = (1, 1) + self._conv2_kernel = (3, 3) + + # insert SPP will always add to the total nuber of layer, never replace + if insert_spp: + self._spp_keys = spp_keys if spp_keys is not None else [5, 9, 13] + self.layer_list = self._insert_spp(self.layer_list) + + if repetitions > 1: + self.outputs[-2] = True + + if insert_sam: + self.layer_list = self._insert_sam(self.layer_list, self.outputs) + self._repetitions += 1 + self.outputs[-1] = True + + def _get_base_layers(self): + layer_list = [] + outputs = [] + for i in range(self._repetitions): + layers = ['conv1'] * ((i + 1) % 2) + ['conv2'] * (i % 2) + layer_list.extend(layers) + outputs = [False] + outputs + return layer_list, outputs + + def _insert_spp(self, layer_list): + if len(layer_list) <= 3: + layer_list[1] = 'spp' + else: + layer_list[3] = 'spp' + return layer_list + + def _convert_csp(self, layer_list, outputs, csp_stack_size): + layer_list[0] = 'csp_route' + layer_list.insert(csp_stack_size - 1, 'csp_connect') + outputs.insert(csp_stack_size - 1, False) + return layer_list, outputs + + def _insert_sam(self, layer_list, outputs): + if len(layer_list) >= 2 and layer_list[-2] != 'spp': + layer_list.insert(-2, 'sam') + outputs.insert(-1, True) + else: + layer_list.insert(-1, 'sam') + outputs.insert(-1, False) + return layer_list + + def _conv1(self, filters, kwargs, csp=False): + if csp: + filters_ = self._csp_filters + else: + filters_ = self._conv1_filters + + x1 = ConvBN( + filters=filters_(filters), + kernel_size=self._conv1_kernel, + strides=(1, 1), + padding='same', + use_bn=True, + **kwargs) + return x1 + + def _conv2(self, filters, kwargs, csp=False): + if csp: + filters_ = self._csp_filters + else: + filters_ = self._conv2_filters + + x1 = ConvBN( + filters=filters_(filters), + kernel_size=self._conv2_kernel, + strides=(1, 1), + padding='same', + use_bn=True, + **kwargs) + return x1 + + def _csp_route(self, filters, kwargs): + x1 = CSPRoute( + filters=filters, + filter_scale=self._csp_scale, + downsample=False, + **kwargs) + return x1 + + def _csp_connect(self, filters, kwargs): + x1 = CSPConnect(filters=filters, drop_final=True, drop_first=True, **kwargs) + return x1 + + def _spp(self, filters, kwargs): + x1 = SPP(self._spp_keys) + return x1 + + def _sam(self, filters, kwargs): + x1 = SAM(filters=-1, use_pooling=False, use_bn=True, **kwargs) + return x1 + + def build(self, input_shape): + dark_conv_args = { + 'activation': self._activation, + 'kernel_initializer': self._kernel_initializer, + 'bias_initializer': self._bias_initializer, + 'bias_regularizer': self._bias_regularizer, + 'use_sync_bn': self._use_sync_bn, + 'norm_momentum': self._norm_momentum, + 'norm_epsilon': self._norm_epsilon, + 'kernel_regularizer': self._kernel_regularizer, + 'leaky_alpha': self._leaky_alpha, + } + + csp = False + self.layers = [] + for layer in self.layer_list: + if layer == 'csp_route': + self.layers.append(self._csp_route(self._filters, dark_conv_args)) + csp = True + elif layer == 'csp_connect': + self.layers.append(self._csp_connect(self._filters, dark_conv_args)) + csp = False + elif layer == 'conv1': + self.layers.append(self._conv1(self._filters, dark_conv_args, csp=csp)) + elif layer == 'conv2': + self.layers.append(self._conv2(self._filters, dark_conv_args, csp=csp)) + elif layer == 'spp': + self.layers.append(self._spp(self._filters, dark_conv_args)) + elif layer == 'sam': + self.layers.append(self._sam(-1, _args)) + + self._lim = len(self.layers) + super().build(input_shape) + + def _call_regular(self, inputs, training=None): + # check efficiency + x = inputs + x_prev = x + output_prev = True + + for i, (layer, output) in enumerate(zip(self.layers, self.outputs)): + if output_prev: + x_prev = x + x = layer(x) + output_prev = output + return x_prev, x + + def _call_csp(self, inputs, training=None): + # check efficiency + x = inputs + x_prev = x + output_prev = True + x_route = None + + for i, (layer, output) in enumerate(zip(self.layers, self.outputs)): + if output_prev: + x_prev = x + if i == 0: + x, x_route = layer(x) + elif i == self._csp_stack - 1: + x = layer([x, x_route]) + else: + x = layer(x) + output_prev = output + return x_prev, x + + def call(self, inputs, training=None): + if self._csp_stack > 0: + return self._call_csp(inputs, training=training) + else: + return self._call_regular(inputs) diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py index 5df28a4f3fb..6664a80e722 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks_test.py @@ -13,18 +13,17 @@ # limitations under the License. # Lint as: python3 - -from absl.testing import parameterized -import numpy as np import tensorflow as tf +import numpy as np +from absl.testing import parameterized from official.vision.beta.projects.yolo.modeling.layers import nn_blocks class CSPConnectTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(("same", 224, 224, 64, 1), - ("downsample", 224, 224, 64, 2)) + @parameterized.named_parameters(('same', 224, 224, 64, 1), + ('downsample', 224, 224, 64, 2)) def test_pass_through(self, width, height, filters, mod): x = tf.keras.Input(shape=(width, height, filters)) test_layer = nn_blocks.CSPRoute(filters=filters, filter_scale=mod) @@ -38,8 +37,8 @@ def test_pass_through(self, width, height, filters, mod): [None, np.ceil(width // 2), np.ceil(height // 2), (filters)]) - @parameterized.named_parameters(("same", 224, 224, 64, 1), - ("downsample", 224, 224, 128, 2)) + @parameterized.named_parameters(('same', 224, 224, 64, 1), + ('downsample', 224, 224, 128, 2)) def test_gradient_pass_though(self, filters, width, height, mod): loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD() @@ -49,10 +48,11 @@ def test_gradient_pass_though(self, filters, width, height, mod): init = tf.random_normal_initializer() x = tf.Variable( initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) - y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)), - int(np.ceil(height // 2)), - filters), - dtype=tf.float32)) + y = tf.Variable( + initial_value=init( + shape=(1, int(np.ceil(width // 2)), int(np.ceil(height // 2)), + filters), + dtype=tf.float32)) with tf.GradientTape() as tape: x_hat, x_prev = test_layer(x) @@ -66,12 +66,12 @@ def test_gradient_pass_though(self, filters, width, height, mod): class CSPRouteTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(("same", 224, 224, 64, 1), - ("downsample", 224, 224, 64, 2)) + @parameterized.named_parameters(('same', 224, 224, 64, 1), + ('downsample', 224, 224, 64, 2)) def test_pass_through(self, width, height, filters, mod): x = tf.keras.Input(shape=(width, height, filters)) test_layer = nn_blocks.CSPRoute(filters=filters, filter_scale=mod) - outx, _ = test_layer(x) + outx, px = test_layer(x) print(outx) print(outx.shape.as_list()) self.assertAllEqual( @@ -79,8 +79,8 @@ def test_pass_through(self, width, height, filters, mod): [None, np.ceil(width // 2), np.ceil(height // 2), (filters / mod)]) - @parameterized.named_parameters(("same", 224, 224, 64, 1), - ("downsample", 224, 224, 128, 2)) + @parameterized.named_parameters(('same', 224, 224, 64, 1), + ('downsample', 224, 224, 128, 2)) def test_gradient_pass_though(self, filters, width, height, mod): loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD() @@ -90,10 +90,11 @@ def test_gradient_pass_though(self, filters, width, height, mod): init = tf.random_normal_initializer() x = tf.Variable( initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) - y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width // 2)), - int(np.ceil(height // 2)), - filters), - dtype=tf.float32)) + y = tf.Variable( + initial_value=init( + shape=(1, int(np.ceil(width // 2)), int(np.ceil(height // 2)), + filters), + dtype=tf.float32)) with tf.GradientTape() as tape: x_hat, x_prev = test_layer(x) @@ -107,11 +108,11 @@ def test_gradient_pass_though(self, filters, width, height, mod): class CSPStackTest(tf.test.TestCase, parameterized.TestCase): - def build_layer( - self, layer_type, filters, filter_scale, count, stack_type, downsample): + def build_layer(self, layer_type, filters, filter_scale, count, stack_type, + downsample): if stack_type is not None: layers = [] - if layer_type == "residual": + if layer_type == 'residual': for _ in range(count): layers.append( nn_blocks.DarkResidual( @@ -120,7 +121,7 @@ def build_layer( for _ in range(count): layers.append(nn_blocks.ConvBN(filters=filters)) - if stack_type == "model": + if stack_type == 'model': layers = tf.keras.Sequential(layers=layers) else: layers = None @@ -133,10 +134,10 @@ def build_layer( return stack @parameterized.named_parameters( - ("no_stack", 224, 224, 64, 2, "residual", None, 0, True), - ("residual_stack", 224, 224, 64, 2, "residual", "list", 2, True), - ("conv_stack", 224, 224, 64, 2, "conv", "list", 3, False), - ("callable_no_scale", 224, 224, 64, 1, "residual", "model", 5, False)) + ('no_stack', 224, 224, 64, 2, 'residual', None, 0, True), + ('residual_stack', 224, 224, 64, 2, 'residual', 'list', 2, True), + ('conv_stack', 224, 224, 64, 2, 'conv', 'list', 3, False), + ('callable_no_scale', 224, 224, 64, 1, 'residual', 'model', 5, False)) def test_pass_through(self, width, height, filters, mod, layer_type, stack_type, count, downsample): x = tf.keras.Input(shape=(width, height, filters)) @@ -152,10 +153,10 @@ def test_pass_through(self, width, height, filters, mod, layer_type, self.assertAllEqual(outx.shape.as_list(), [None, width, height, filters]) @parameterized.named_parameters( - ("no_stack", 224, 224, 64, 2, "residual", None, 0, True), - ("residual_stack", 224, 224, 64, 2, "residual", "list", 2, True), - ("conv_stack", 224, 224, 64, 2, "conv", "list", 3, False), - ("callable_no_scale", 224, 224, 64, 1, "residual", "model", 5, False)) + ('no_stack', 224, 224, 64, 2, 'residual', None, 0, True), + ('residual_stack', 224, 224, 64, 2, 'residual', 'list', 2, True), + ('conv_stack', 224, 224, 64, 2, 'conv', 'list', 3, False), + ('callable_no_scale', 224, 224, 64, 1, 'residual', 'model', 5, False)) def test_gradient_pass_though(self, width, height, filters, mod, layer_type, stack_type, count, downsample): loss = tf.keras.losses.MeanSquaredError() @@ -188,10 +189,10 @@ def test_gradient_pass_though(self, width, height, filters, mod, layer_type, class ConvBNTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( - ("valid", (3, 3), "valid", (1, 1)), ("same", (3, 3), "same", (1, 1)), - ("downsample", (3, 3), "same", (2, 2)), ("test", (1, 1), "valid", (1, 1))) + ('valid', (3, 3), 'valid', (1, 1)), ('same', (3, 3), 'same', (1, 1)), + ('downsample', (3, 3), 'same', (2, 2)), ('test', (1, 1), 'valid', (1, 1))) def test_pass_through(self, kernel_size, padding, strides): - if padding == "same": + if padding == 'same': pad_const = 1 else: pad_const = 0 @@ -212,16 +213,16 @@ def test_pass_through(self, kernel_size, padding, strides): print(test) self.assertAllEqual(outx.shape.as_list(), test) - @parameterized.named_parameters(("filters", 3)) + @parameterized.named_parameters(('filters', 3)) def test_gradient_pass_though(self, filters): loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD() - with tf.device("/CPU:0"): - test_layer = nn_blocks.ConvBN(filters, kernel_size=(3, 3), padding="same") + with tf.device('/CPU:0'): + test_layer = nn_blocks.ConvBN(filters, kernel_size=(3, 3), padding='same') init = tf.random_normal_initializer() - x = tf.Variable(initial_value=init(shape=(1, 224, 224, - 3), dtype=tf.float32)) + x = tf.Variable( + initial_value=init(shape=(1, 224, 224, 3), dtype=tf.float32)) y = tf.Variable( initial_value=init(shape=(1, 224, 224, filters), dtype=tf.float32)) @@ -235,9 +236,9 @@ def test_gradient_pass_though(self, filters): class DarkResidualTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(("same", 224, 224, 64, False), - ("downsample", 223, 223, 32, True), - ("oddball", 223, 223, 32, False)) + @parameterized.named_parameters(('same', 224, 224, 64, False), + ('downsample', 223, 223, 32, True), + ('oddball', 223, 223, 32, False)) def test_pass_through(self, width, height, filters, downsample): mod = 1 if downsample: @@ -252,9 +253,9 @@ def test_pass_through(self, width, height, filters, downsample): [None, np.ceil(width / mod), np.ceil(height / mod), filters]) - @parameterized.named_parameters(("same", 64, 224, 224, False), - ("downsample", 32, 223, 223, True), - ("oddball", 32, 223, 223, False)) + @parameterized.named_parameters(('same', 64, 224, 224, False), + ('downsample', 32, 223, 223, True), + ('oddball', 32, 223, 223, False)) def test_gradient_pass_though(self, filters, width, height, downsample): loss = tf.keras.losses.MeanSquaredError() optimizer = tf.keras.optimizers.SGD() @@ -268,10 +269,11 @@ def test_gradient_pass_though(self, filters, width, height, downsample): init = tf.random_normal_initializer() x = tf.Variable( initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) - y = tf.Variable(initial_value=init(shape=(1, int(np.ceil(width / mod)), - int(np.ceil(height / mod)), - filters), - dtype=tf.float32)) + y = tf.Variable( + initial_value=init( + shape=(1, int(np.ceil(width / mod)), int(np.ceil(height / mod)), + filters), + dtype=tf.float32)) with tf.GradientTape() as tape: x_hat = test_layer(x) @@ -281,5 +283,102 @@ def test_gradient_pass_though(self, filters, width, height, downsample): self.assertNotIn(None, grad) -if __name__ == "__main__": + +class DarkSppTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(('RouteProcessSpp', 224, 224, 3, [5, 9, 13]), + ('test1', 300, 300, 10, [2, 3, 4, 5]), + ('test2', 256, 256, 5, [10])) + def test_pass_through(self, width, height, channels, sizes): + x = tf.keras.Input(shape=(width, height, channels)) + test_layer = nn_blocks.SPP(sizes=sizes) + outx = test_layer(x) + self.assertAllEqual(outx.shape.as_list(), + [None, width, height, channels * (len(sizes) + 1)]) + return + + @parameterized.named_parameters(('RouteProcessSpp', 224, 224, 3, [5, 9, 13]), + ('test1', 300, 300, 10, [2, 3, 4, 5]), + ('test2', 256, 256, 5, [10])) + def test_gradient_pass_though(self, width, height, channels, sizes): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + test_layer = nn_blocks.SPP(sizes=sizes) + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init( + shape=(1, width, height, channels), dtype=tf.float32)) + y = tf.Variable( + initial_value=init( + shape=(1, width, height, channels * (len(sizes) + 1)), + dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat = test_layer(x) + grad_loss = loss(x_hat, y) + grad = tape.gradient(grad_loss, test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + return + + +class DarkRouteProcessTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + ('test1', 224, 224, 64, 7, False), ('test2', 223, 223, 32, 3, False), + ('tiny', 223, 223, 16, 1, False), ('spp', 224, 224, 64, 7, False)) + def test_pass_through(self, width, height, filters, repetitions, spp): + x = tf.keras.Input(shape=(width, height, filters)) + test_layer = nn_blocks.DarkRouteProcess( + filters=filters, repetitions=repetitions, insert_spp=spp) + outx = test_layer(x) + self.assertEqual(len(outx), 2, msg='len(outx) != 2') + if repetitions == 1: + filter_y1 = filters + else: + filter_y1 = filters // 2 + self.assertAllEqual(outx[1].shape.as_list(), [None, width, height, filter_y1]) + self.assertAllEqual( + filters % 2, + 0, + msg='Output of a DarkRouteProcess layer has an odd number of filters') + self.assertAllEqual(outx[0].shape.as_list(), [None, width, height, filters]) + + @parameterized.named_parameters( + ('test1', 224, 224, 64, 7, False), ('test2', 223, 223, 32, 3, False), + ('tiny', 223, 223, 16, 1, False), ('spp', 224, 224, 64, 7, False)) + def test_gradient_pass_though(self, width, height, filters, repetitions, spp): + loss = tf.keras.losses.MeanSquaredError() + optimizer = tf.keras.optimizers.SGD() + test_layer = nn_blocks.DarkRouteProcess( + filters=filters, repetitions=repetitions, insert_spp=spp) + + if repetitions == 1: + filter_y1 = filters + else: + filter_y1 = filters // 2 + + init = tf.random_normal_initializer() + x = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + y_0 = tf.Variable( + initial_value=init(shape=(1, width, height, filters), dtype=tf.float32)) + y_1 = tf.Variable( + initial_value=init(shape=(1, width, height, filter_y1), dtype=tf.float32)) + + with tf.GradientTape() as tape: + x_hat_0, x_hat_1 = test_layer(x) + grad_loss_0 = loss(x_hat_0, y_0) + grad_loss_1 = loss(x_hat_1, y_1) + grad = tape.gradient([grad_loss_0, grad_loss_1], + test_layer.trainable_variables) + optimizer.apply_gradients(zip(grad, test_layer.trainable_variables)) + + self.assertNotIn(None, grad) + return + + +if __name__ == '__main__': tf.test.main() From 725b8c8c85d36fbee58f0025364f4e1acdcb0c94 Mon Sep 17 00:00:00 2001 From: Anirudh Vegesana Date: Wed, 26 May 2021 23:39:14 -0400 Subject: [PATCH 09/10] disclaimer (#10020) Co-authored-by: Vishnu Banna <43182884+vishnubanna@users.noreply.github.com> --- official/vision/beta/projects/yolo/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/official/vision/beta/projects/yolo/README.md b/official/vision/beta/projects/yolo/README.md index 0a1e27fbe90..166ac8d9842 100644 --- a/official/vision/beta/projects/yolo/README.md +++ b/official/vision/beta/projects/yolo/README.md @@ -74,3 +74,7 @@ head could be connected to a new, more powerful backbone if a person chose to. [![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0) [![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/) + + + +DISCLAIMER: this YOLO implementation is still under development. No support will be provided during the development phase. From 9dd1c2c0aa2d4f4508543e9c34c2aa76678f8706 Mon Sep 17 00:00:00 2001 From: anivegesana Date: Thu, 27 May 2021 19:07:15 -0400 Subject: [PATCH 10/10] Fix some PyLint errors --- .../projects/yolo/modeling/decoders/yolo_decoder.py | 12 ++++++------ .../beta/projects/yolo/modeling/layers/nn_blocks.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py index 1dbaae6ebf1..ae66e3797cf 100644 --- a/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py +++ b/official/vision/beta/projects/yolo/modeling/decoders/yolo_decoder.py @@ -331,13 +331,13 @@ def get_raw_depths(self, minimum_depth, inputs): Args: minimum_depth: `int` depth of the smallest branch of the FPN. - inputs: `dict[str, tf.InputSpec]` of the shape of input args as a dictionary of - lists. + inputs: `dict[str, tf.InputSpec]` of the shape of input args as a + dictionary of lists. Returns: The unscaled depths of the FPN branches. """ - + depths = [] if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1: for i in range(self._min_level, self._max_level + 1): @@ -386,8 +386,8 @@ def __init__(self, kernel_regularizer=None, bias_regularizer=None, **kwargs): - """Yolo Decoder initialization function. A unified model that ties all decoder - components into a conditionally build YOLO decder. + """Yolo Decoder initialization function. A unified model that ties all + decoder components into a conditionally build YOLO decoder. Args: input_specs: `dict[str, tf.InputSpec]`: input specs of each of the inputs @@ -409,7 +409,7 @@ def __init__(self, zero. kernel_initializer: kernel_initializer for convolutional layers. kernel_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. - bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2d. + bias_regularizer: tf.keras.regularizers.Regularizer object for Conv2D. **kwargs: keyword arguments to be passed. """ diff --git a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py index 9897def3ad3..02895ff3db4 100644 --- a/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py +++ b/official/vision/beta/projects/yolo/modeling/layers/nn_blocks.py @@ -1152,8 +1152,8 @@ def build(self, input_shape): def call(self, inputs, training=None): if self._use_pooling: - depth_max = tf.reduce_max(inputs, axis=-1, keep_dims=True) - depth_avg = tf.reduce_mean(inputs, axis=-1, keep_dims=True) + depth_max = tf.reduce_max(inputs, axis=-1, keepdims=True) + depth_avg = tf.reduce_mean(inputs, axis=-1, keepdims=True) input_maps = tf.concat([depth_avg, depth_max], axis=-1) else: input_maps = inputs @@ -1545,7 +1545,7 @@ def build(self, input_shape): elif layer == 'spp': self.layers.append(self._spp(self._filters, dark_conv_args)) elif layer == 'sam': - self.layers.append(self._sam(-1, _args)) + self.layers.append(self._sam(-1, dark_conv_args)) self._lim = len(self.layers) super().build(input_shape)