GoogleCloudPlatform · prawilny · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025
diff --git a/plugins/core-plugin/src/main/resources/terraform-classic-template.tf b/plugins/core-plugin/src/main/resources/terraform-classic-template.tf
@@ -152,6 +152,7 @@ resource "google_dataflow_job" "generated" {
 	max_workers = var.max_workers
 	name = var.name
 	network = var.network
+	on_delete = var.on_delete
 	service_account_email = var.service_account_email
 	skip_wait_on_job_termination = var.skip_wait_on_job_termination
 	subnetwork = var.subnetwork

diff --git a/plugins/core-plugin/src/main/resources/terraform-flex-template.tf b/plugins/core-plugin/src/main/resources/terraform-flex-template.tf
@@ -106,6 +106,7 @@ variable "max_workers" {
 
 variable "name" {
 	type = string
+	description = "A unique name for the resource, required by Dataflow."
 }
 
 variable "network" {
@@ -183,6 +184,7 @@ resource "google_dataflow_flex_template_job" "generated" {
 	name = var.name
 	network = var.network
 	num_workers = var.num_workers
+	on_delete = var.on_delete
 	sdk_container_image = var.sdk_container_image
 	service_account_email = var.service_account_email
 	skip_wait_on_job_termination = var.skip_wait_on_job_termination

diff --git a/v1/terraform/Bulk_Compress_GCS_Files/dataflow_job.tf b/v1/terraform/Bulk_Compress_GCS_Files/dataflow_job.tf
@@ -35,19 +35,19 @@ variable "region" {
 
 variable "inputFilePattern" {
   type        = string
-  description = "The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.txt)"
+  description = "The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.txt`"
 
 }
 
 variable "outputDirectory" {
   type        = string
-  description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path)"
+  description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/your-path`"
 
 }
 
 variable "outputFailureFile" {
   type        = string
-  description = "The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/compressed/failed.csv)"
+  description = "The error log output file to use for write failures that occur during compression. The contents will be one line for each file which failed compression. Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/compressed/failed.csv`"
 
 }
 
@@ -176,6 +176,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork

diff --git a/v1/terraform/Bulk_Decompress_GCS_Files/dataflow_job.tf b/v1/terraform/Bulk_Decompress_GCS_Files/dataflow_job.tf
@@ -35,19 +35,19 @@ variable "region" {
 
 variable "inputFilePattern" {
   type        = string
-  description = "The Cloud Storage location of the files you'd like to process. (Example: gs://your-bucket/your-files/*.gz)"
+  description = "The Cloud Storage location of the files you'd like to process. For example, `gs://your-bucket/your-files/*.gz`"
 
 }
 
 variable "outputDirectory" {
   type        = string
-  description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/decompressed/)"
+  description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. For example, `gs://your-bucket/decompressed/`"
 
 }
 
 variable "outputFailureFile" {
   type        = string
-  description = "The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. (Example: gs://your-bucket/decompressed/failed.csv)"
+  description = "The output file to write failures to during the decompression process. If there are no failures, the file will still be created but will be empty. The contents will be one line for each file which failed decompression in CSV format (Filename, Error). Note that this parameter will allow the pipeline to continue processing in the event of a failure. For example, `gs://your-bucket/decompressed/failed.csv`"
 
 }
 
@@ -162,6 +162,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork

diff --git a/v1/terraform/Cassandra_To_Cloud_Bigtable/dataflow_job.tf b/v1/terraform/Cassandra_To_Cloud_Bigtable/dataflow_job.tf
@@ -41,7 +41,7 @@ variable "cassandraHosts" {
 
 variable "cassandraPort" {
   type        = number
-  description = "The TCP port to use to reach Apache Cassandra on the nodes. The default value is 9042."
+  description = "The TCP port to use to reach Apache Cassandra on the nodes. The default value is `9042`."
   default     = null
 }
 
@@ -77,13 +77,13 @@ variable "bigtableTableId" {
 
 variable "defaultColumnFamily" {
   type        = string
-  description = "The name of the column family of the Bigtable table. The default value is default."
+  description = "The name of the column family of the Bigtable table. The default value is `default`."
   default     = null
 }
 
 variable "rowKeySeparator" {
   type        = string
-  description = "The separator used to build row-keys. The default value is '#'."
+  description = "The separator used to build row-keys. The default value is `#`."
   default     = null
 }
 
@@ -227,6 +227,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork

diff --git a/v1/terraform/Cloud_BigQuery_to_Cloud_Datastore/dataflow_job.tf b/v1/terraform/Cloud_BigQuery_to_Cloud_Datastore/dataflow_job.tf
@@ -35,7 +35,7 @@ variable "region" {
 
 variable "readQuery" {
   type        = string
-  description = "SQL query in standard SQL to pull data from BigQuery"
+  description = "A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`."
 
 }
 
@@ -47,13 +47,13 @@ variable "readIdColumn" {
 
 variable "invalidOutputPath" {
   type        = string
-  description = "Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path)"
+  description = "Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`"
   default     = null
 }
 
 variable "datastoreWriteProjectId" {
   type        = string
-  description = "The Google Cloud project ID of where to write Datastore entities"
+  description = "The ID of the Google Cloud project to write the Datastore entities to."
 
 }
 
@@ -70,14 +70,14 @@ variable "datastoreWriteNamespace" {
 }
 
 variable "datastoreHintNumWorkers" {
-  type        = string
-  description = "Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to: 500."
+  type        = number
+  description = "Hint for the expected number of workers in the Datastore ramp-up throttling step. Defaults to `500`."
   default     = null
 }
 
 variable "errorWritePath" {
   type        = string
-  description = "The error log output folder to use for write failures that occur during processing. (Example: gs://your-bucket/errors/)"
+  description = "The error log output file to use for write failures that occur during processing. For example, `gs://your-bucket/errors/`"
 
 }
 
@@ -184,7 +184,7 @@ resource "google_dataflow_job" "generated" {
     datastoreWriteProjectId  = var.datastoreWriteProjectId
     datastoreWriteEntityKind = var.datastoreWriteEntityKind
     datastoreWriteNamespace  = var.datastoreWriteNamespace
-    datastoreHintNumWorkers  = var.datastoreHintNumWorkers
+    datastoreHintNumWorkers  = tostring(var.datastoreHintNumWorkers)
     errorWritePath           = var.errorWritePath
   }
 
@@ -197,6 +197,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork

diff --git a/v1/terraform/Cloud_BigQuery_to_GCS_TensorFlow_Records/dataflow_job.tf b/v1/terraform/Cloud_BigQuery_to_GCS_TensorFlow_Records/dataflow_job.tf
@@ -35,7 +35,7 @@ variable "region" {
 
 variable "readQuery" {
   type        = string
-  description = "SQL query in standard SQL to pull data from BigQuery"
+  description = "A BigQuery SQL query that extracts data from the source. For example, `select * from dataset1.sample_table`."
 
 }
 
@@ -47,37 +47,37 @@ variable "readIdColumn" {
 
 variable "invalidOutputPath" {
   type        = string
-  description = "Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. (Example: gs://your-bucket/your-path)"
+  description = "Cloud Storage path where to write BigQuery rows that cannot be converted to target entities. For example, `gs://your-bucket/your-path`"
   default     = null
 }
 
 variable "outputDirectory" {
   type        = string
-  description = "Cloud Storage directory to store output TFRecord files. (Example: gs://your-bucket/your-path)"
+  description = "The top-level Cloud Storage path prefix to use when writing the training, testing, and validation TFRecord files. Subdirectories for resulting training, testing, and validation TFRecord files are automatically generated from `outputDirectory`. For example, `gs://mybucket/output`"
 
 }
 
 variable "outputSuffix" {
   type        = string
-  description = "File suffix to append to TFRecord files. Defaults to .tfrecord"
+  description = "The file suffix for the training, testing, and validation TFRecord files that are written. The default value is `.tfrecord`."
   default     = null
 }
 
 variable "trainingPercentage" {
-  type        = string
-  description = "Defaults to 1 or 100%. Should be decimal between 0 and 1 inclusive"
+  type        = number
+  description = "The percentage of query data allocated to training TFRecord files. The default value is `1`, or `100%`."
   default     = null
 }
 
 variable "testingPercentage" {
-  type        = string
-  description = "Defaults to 0 or 0%. Should be decimal between 0 and 1 inclusive"
+  type        = number
+  description = "The percentage of query data allocated to testing TFRecord files. The default value is `0`, or `0%`."
   default     = null
 }
 
 variable "validationPercentage" {
-  type        = string
-  description = "Defaults to 0 or 0%. Should be decimal between 0 and 1 inclusive"
+  type        = number
+  description = "The percentage of query data allocated to validation TFRecord files. The default value is `0`, or `0%`."
   default     = null
 }
 
@@ -183,9 +183,9 @@ resource "google_dataflow_job" "generated" {
     invalidOutputPath    = var.invalidOutputPath
     outputDirectory      = var.outputDirectory
     outputSuffix         = var.outputSuffix
-    trainingPercentage   = var.trainingPercentage
-    testingPercentage    = var.testingPercentage
-    validationPercentage = var.validationPercentage
+    trainingPercentage   = tostring(var.trainingPercentage)
+    testingPercentage    = tostring(var.testingPercentage)
+    validationPercentage = tostring(var.validationPercentage)
   }
 
   additional_experiments       = var.additional_experiments
@@ -197,6 +197,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork

diff --git a/v1/terraform/Cloud_Bigtable_to_GCS_Avro/dataflow_job.tf b/v1/terraform/Cloud_Bigtable_to_GCS_Avro/dataflow_job.tf
@@ -35,36 +35,40 @@ variable "region" {
 
 variable "bigtableProjectId" {
   type        = string
-  description = "The ID of the Google Cloud project of the Cloud Bigtable instance that you want to read data from"
+  description = "The ID of the Google Cloud project that contains the Bigtable instance that you want to read data from."
 
 }
 
 variable "bigtableInstanceId" {
   type        = string
-  description = "The ID of the Cloud Bigtable instance that contains the table"
+  description = "The ID of the Bigtable instance that contains the table."
 
 }
 
 variable "bigtableTableId" {
   type        = string
-  description = "The ID of the Cloud Bigtable table to read"
+  description = "The ID of the Bigtable table to export."
 
 }
 
 variable "outputDirectory" {
   type        = string
-  description = "The path and filename prefix for writing output files. Must end with a slash. DateTime formatting is used to parse directory path for date & time formatters. (Example: gs://your-bucket/your-path)"
+  description = "The Cloud Storage path where data is written. For example, `gs://mybucket/somefolder`"
 
 }
 
 variable "filenamePrefix" {
   type        = string
-  description = <<EOT
-The prefix of the Avro file name. For example, "table1-". Defaults to: part.
-EOT
+  description = "The prefix of the Avro filename. For example, `output-`. Defaults to: part."
   default     = "part"
 }
 
+variable "bigtableAppProfileId" {
+  type        = string
+  description = "The ID of the Bigtable application profile to use for the export. If you don't specify an app profile, Bigtable uses the instance's default app profile: https://cloud.google.com/bigtable/docs/app-profiles#default-app-profile."
+  default     = null
+}
+
 
 provider "google" {
   project = var.project
@@ -162,11 +166,12 @@ resource "google_dataflow_job" "generated" {
   provider          = google
   template_gcs_path = "gs://dataflow-templates-${var.region}/latest/Cloud_Bigtable_to_GCS_Avro"
   parameters = {
-    bigtableProjectId  = var.bigtableProjectId
-    bigtableInstanceId = var.bigtableInstanceId
-    bigtableTableId    = var.bigtableTableId
-    outputDirectory    = var.outputDirectory
-    filenamePrefix     = var.filenamePrefix
+    bigtableProjectId    = var.bigtableProjectId
+    bigtableInstanceId   = var.bigtableInstanceId
+    bigtableTableId      = var.bigtableTableId
+    outputDirectory      = var.outputDirectory
+    filenamePrefix       = var.filenamePrefix
+    bigtableAppProfileId = var.bigtableAppProfileId
   }
 
   additional_experiments       = var.additional_experiments
@@ -178,6 +183,7 @@ resource "google_dataflow_job" "generated" {
   max_workers                  = var.max_workers
   name                         = var.name
   network                      = var.network
+  on_delete                    = var.on_delete
   service_account_email        = var.service_account_email
   skip_wait_on_job_termination = var.skip_wait_on_job_termination
   subnetwork                   = var.subnetwork