1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.templates.html">templates</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="#create">create(projectId, location, body, x__xgafv=None)</a></code></p>
79<p class="firstline">Creates a Cloud Dataflow job from a template.</p>
80<p class="toc_element">
81  <code><a href="#get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</a></code></p>
82<p class="firstline">Get the template associated with a template.</p>
83<p class="toc_element">
84  <code><a href="#launch">launch(projectId, location, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</a></code></p>
85<p class="firstline">Launch a template.</p>
86<h3>Method Details</h3>
87<div class="method">
88    <code class="details" id="create">create(projectId, location, body, x__xgafv=None)</code>
89  <pre>Creates a Cloud Dataflow job from a template.
90
91Args:
92  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
93  location: string, The [regional endpoint]
94(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
95which to direct the request. (required)
96  body: object, The request body. (required)
97    The object takes the form of:
98
99{ # A request to create a Cloud Dataflow job from a template.
100    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
101      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
102          # template if not specified.
103      "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
104          # the service will use the network "default".
105      "zone": "A String", # The Compute Engine [availability
106          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
107          # for launching worker instances to run your pipeline.
108      "additionalUserLabels": { # Additional user labels to be specified for the job.
109          # Keys and values should follow the restrictions specified in the [labeling
110          # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
111          # page.
112        "a_key": "A String",
113      },
114      "additionalExperiments": [ # Additional experiment flags for the job.
115        "A String",
116      ],
117      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
118          # Use with caution.
119      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
120          # Must be a valid Cloud Storage URL, beginning with `gs://`.
121      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
122      "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
123      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
124          # available to your pipeline during execution, from 1 to 1000.
125      "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
126          # the form "regions/REGION/subnetworks/SUBNETWORK".
127    },
128    "gcsPath": "A String", # Required. A Cloud Storage path to the template from which to
129        # create the job.
130        # Must be a valid Cloud Storage URL, beginning with `gs://`.
131    "location": "A String", # The [regional endpoint]
132        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
133        # which to direct the request.
134    "parameters": { # The runtime parameters to pass to the job.
135      "a_key": "A String",
136    },
137    "jobName": "A String", # Required. The job name to use for the created job.
138  }
139
140  x__xgafv: string, V1 error format.
141    Allowed values
142      1 - v1 error format
143      2 - v2 error format
144
145Returns:
146  An object of the form:
147
148    { # Defines a job to be run by the Cloud Dataflow service.
149    "labels": { # User-defined labels for this job.
150        #
151        # The labels map can contain no more than 64 entries.  Entries of the labels
152        # map are UTF8 strings that comply with the following restrictions:
153        #
154        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
155        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
156        # * Both keys and values are additionally constrained to be <= 128 bytes in
157        # size.
158      "a_key": "A String",
159    },
160    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
161        # by the metadata values provided here. Populated for ListJobs and all GetJob
162        # views SUMMARY and higher.
163        # ListJob response and Job SUMMARY view.
164      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
165        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
166        "version": "A String", # The version of the SDK used to run the job.
167        "sdkSupportStatus": "A String", # The support status for this SDK version.
168      },
169      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
170        { # Metadata for a PubSub connector used by the job.
171          "topic": "A String", # Topic accessed in the connection.
172          "subscription": "A String", # Subscription used in the connection.
173        },
174      ],
175      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
176        { # Metadata for a Datastore connector used by the job.
177          "projectId": "A String", # ProjectId accessed in the connection.
178          "namespace": "A String", # Namespace used in the connection.
179        },
180      ],
181      "fileDetails": [ # Identification of a File source used in the Dataflow job.
182        { # Metadata for a File connector used by the job.
183          "filePattern": "A String", # File Pattern used to access files by the connector.
184        },
185      ],
186      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
187        { # Metadata for a Spanner connector used by the job.
188          "instanceId": "A String", # InstanceId accessed in the connection.
189          "projectId": "A String", # ProjectId accessed in the connection.
190          "databaseId": "A String", # DatabaseId accessed in the connection.
191        },
192      ],
193      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
194        { # Metadata for a BigTable connector used by the job.
195          "instanceId": "A String", # InstanceId accessed in the connection.
196          "projectId": "A String", # ProjectId accessed in the connection.
197          "tableId": "A String", # TableId accessed in the connection.
198        },
199      ],
200      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
201        { # Metadata for a BigQuery connector used by the job.
202          "projectId": "A String", # Project accessed in the connection.
203          "dataset": "A String", # Dataset accessed in the connection.
204          "table": "A String", # Table accessed in the connection.
205          "query": "A String", # Query used to access data in the connection.
206        },
207      ],
208    },
209    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
210        # A description of the user pipeline and stages through which it is executed.
211        # Created by Cloud Dataflow service.  Only retrieved with
212        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
213        # form.  This data is provided by the Dataflow service for ease of visualizing
214        # the pipeline and interpreting Dataflow provided metrics.
215      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
216        { # Description of the type, names/ids, and input/outputs for a transform.
217          "kind": "A String", # Type of transform.
218          "name": "A String", # User provided name for this transform instance.
219          "inputCollectionName": [ # User names for all collection inputs to this transform.
220            "A String",
221          ],
222          "displayData": [ # Transform-specific display data.
223            { # Data provided with a pipeline or transform to provide descriptive info.
224              "shortStrValue": "A String", # A possible additional shorter value to display.
225                  # For example a java_class_name_value of com.mypackage.MyDoFn
226                  # will be stored with MyDoFn as the short_str_value and
227                  # com.mypackage.MyDoFn as the java_class_name value.
228                  # short_str_value can be displayed and java_class_name_value
229                  # will be displayed as a tooltip.
230              "durationValue": "A String", # Contains value if the data is of duration type.
231              "url": "A String", # An optional full URL.
232              "floatValue": 3.14, # Contains value if the data is of float type.
233              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
234                  # language namespace (i.e. python module) which defines the display data.
235                  # This allows a dax monitoring system to specially handle the data
236                  # and perform custom rendering.
237              "javaClassValue": "A String", # Contains value if the data is of java class type.
238              "label": "A String", # An optional label to display in a dax UI for the element.
239              "boolValue": True or False, # Contains value if the data is of a boolean type.
240              "strValue": "A String", # Contains value if the data is of string type.
241              "key": "A String", # The key identifying the display data.
242                  # This is intended to be used as a label for the display data
243                  # when viewed in a dax monitoring system.
244              "int64Value": "A String", # Contains value if the data is of int64 type.
245              "timestampValue": "A String", # Contains value if the data is of timestamp type.
246            },
247          ],
248          "outputCollectionName": [ # User  names for all collection outputs to this transform.
249            "A String",
250          ],
251          "id": "A String", # SDK generated id of this transform instance.
252        },
253      ],
254      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
255        { # Description of the composing transforms, names/ids, and input/outputs of a
256            # stage of execution.  Some composing transforms and sources may have been
257            # generated by the Dataflow service during execution planning.
258          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
259            { # Description of an interstitial value between transforms in an execution
260                # stage.
261              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
262              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
263                  # source is most closely associated.
264              "name": "A String", # Dataflow service generated name for this source.
265            },
266          ],
267          "kind": "A String", # Type of tranform this stage is executing.
268          "name": "A String", # Dataflow service generated name for this stage.
269          "outputSource": [ # Output sources for this stage.
270            { # Description of an input or output of an execution stage.
271              "userName": "A String", # Human-readable name for this source; may be user or system generated.
272              "sizeBytes": "A String", # Size of the source, if measurable.
273              "name": "A String", # Dataflow service generated name for this source.
274              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
275                  # source is most closely associated.
276            },
277          ],
278          "inputSource": [ # Input sources for this stage.
279            { # Description of an input or output of an execution stage.
280              "userName": "A String", # Human-readable name for this source; may be user or system generated.
281              "sizeBytes": "A String", # Size of the source, if measurable.
282              "name": "A String", # Dataflow service generated name for this source.
283              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
284                  # source is most closely associated.
285            },
286          ],
287          "componentTransform": [ # Transforms that comprise this execution stage.
288            { # Description of a transform executed as part of an execution stage.
289              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
290              "originalTransform": "A String", # User name for the original user transform with which this transform is
291                  # most closely associated.
292              "name": "A String", # Dataflow service generated name for this source.
293            },
294          ],
295          "id": "A String", # Dataflow service generated id for this stage.
296        },
297      ],
298      "displayData": [ # Pipeline level display data.
299        { # Data provided with a pipeline or transform to provide descriptive info.
300          "shortStrValue": "A String", # A possible additional shorter value to display.
301              # For example a java_class_name_value of com.mypackage.MyDoFn
302              # will be stored with MyDoFn as the short_str_value and
303              # com.mypackage.MyDoFn as the java_class_name value.
304              # short_str_value can be displayed and java_class_name_value
305              # will be displayed as a tooltip.
306          "durationValue": "A String", # Contains value if the data is of duration type.
307          "url": "A String", # An optional full URL.
308          "floatValue": 3.14, # Contains value if the data is of float type.
309          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
310              # language namespace (i.e. python module) which defines the display data.
311              # This allows a dax monitoring system to specially handle the data
312              # and perform custom rendering.
313          "javaClassValue": "A String", # Contains value if the data is of java class type.
314          "label": "A String", # An optional label to display in a dax UI for the element.
315          "boolValue": True or False, # Contains value if the data is of a boolean type.
316          "strValue": "A String", # Contains value if the data is of string type.
317          "key": "A String", # The key identifying the display data.
318              # This is intended to be used as a label for the display data
319              # when viewed in a dax monitoring system.
320          "int64Value": "A String", # Contains value if the data is of int64 type.
321          "timestampValue": "A String", # Contains value if the data is of timestamp type.
322        },
323      ],
324    },
325    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
326        # callers cannot mutate it.
327      { # A message describing the state of a particular execution stage.
328        "executionStageName": "A String", # The name of the execution stage.
329        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
330        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
331      },
332    ],
333    "id": "A String", # The unique ID of this job.
334        #
335        # This field is set by the Cloud Dataflow service when the Job is
336        # created, and is immutable for the life of the job.
337    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
338        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
339    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
340    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
341        # corresponding name prefixes of the new job.
342      "a_key": "A String",
343    },
344    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
345      "version": { # A structure describing which components and their versions of the service
346          # are required in order to run the job.
347        "a_key": "", # Properties of the object.
348      },
349      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
350      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
351          # at rest, AKA a Customer Managed Encryption Key (CMEK).
352          #
353          # Format:
354          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
355      "internalExperiments": { # Experimental settings.
356        "a_key": "", # Properties of the object. Contains field @type with type URL.
357      },
358      "dataset": "A String", # The dataset for the current project where various workflow
359          # related tables are stored.
360          #
361          # The supported resource type is:
362          #
363          # Google BigQuery:
364          #   bigquery.googleapis.com/{dataset}
365      "experiments": [ # The list of experiments to enable.
366        "A String",
367      ],
368      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
369      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
370          # options are passed through the service and are used to recreate the
371          # SDK pipeline options on the worker in a language agnostic and platform
372          # independent way.
373        "a_key": "", # Properties of the object.
374      },
375      "userAgent": { # A description of the process that generated the request.
376        "a_key": "", # Properties of the object.
377      },
378      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
379          # unspecified, the service will attempt to choose a reasonable
380          # default.  This should be in the form of the API service name,
381          # e.g. "compute.googleapis.com".
382      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
383          # specified in order for the job to have workers.
384        { # Describes one particular pool of Cloud Dataflow workers to be
385            # instantiated by the Cloud Dataflow service in order to perform the
386            # computations required by a job.  Note that a workflow job may use
387            # multiple pools, in order to match the various computational
388            # requirements of the various stages of the job.
389          "diskSourceImage": "A String", # Fully qualified source image for disks.
390          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
391              # using the standard Dataflow task runner.  Users should ignore
392              # this field.
393            "workflowFileName": "A String", # The file to store the workflow in.
394            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
395                # will not be uploaded.
396                #
397                # The supported resource type is:
398                #
399                # Google Cloud Storage:
400                #   storage.googleapis.com/{bucket}/{object}
401                #   bucket.storage.googleapis.com/{object}
402            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
403            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
404              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
405              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
406                  # "shuffle/v1beta1".
407              "workerId": "A String", # The ID of the worker running this pipeline.
408              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
409                  #
410                  # When workers access Google Cloud APIs, they logically do so via
411                  # relative URLs.  If this field is specified, it supplies the base
412                  # URL to use for resolving these relative URLs.  The normative
413                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
414                  # Locators".
415                  #
416                  # If not specified, the default value is "http://www.googleapis.com/"
417              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
418                  # "dataflow/v1b3/projects".
419              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
420                  # storage.
421                  #
422                  # The supported resource type is:
423                  #
424                  # Google Cloud Storage:
425                  #
426                  #   storage.googleapis.com/{bucket}/{object}
427                  #   bucket.storage.googleapis.com/{object}
428            },
429            "vmId": "A String", # The ID string of the VM.
430            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
431            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
432            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
433                # access the Cloud Dataflow API.
434              "A String",
435            ],
436            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
437                # taskrunner; e.g. "root".
438            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
439                #
440                # When workers access Google Cloud APIs, they logically do so via
441                # relative URLs.  If this field is specified, it supplies the base
442                # URL to use for resolving these relative URLs.  The normative
443                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
444                # Locators".
445                #
446                # If not specified, the default value is "http://www.googleapis.com/"
447            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
448                # taskrunner; e.g. "wheel".
449            "languageHint": "A String", # The suggested backend language.
450            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
451                # console.
452            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
453            "logDir": "A String", # The directory on the VM to store logs.
454            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
455            "harnessCommand": "A String", # The command to launch the worker harness.
456            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
457                # temporary storage.
458                #
459                # The supported resource type is:
460                #
461                # Google Cloud Storage:
462                #   storage.googleapis.com/{bucket}/{object}
463                #   bucket.storage.googleapis.com/{object}
464            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
465          },
466          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
467              # are supported.
468          "packages": [ # Packages to be installed on workers.
469            { # The packages that must be installed in order for a worker to run the
470                # steps of the Cloud Dataflow job that will be assigned to its worker
471                # pool.
472                #
473                # This is the mechanism by which the Cloud Dataflow SDK causes code to
474                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
475                # might use this to install jars containing the user's code and all of the
476                # various dependencies (libraries, data files, etc.) required in order
477                # for that code to run.
478              "location": "A String", # The resource to read the package from. The supported resource type is:
479                  #
480                  # Google Cloud Storage:
481                  #
482                  #   storage.googleapis.com/{bucket}
483                  #   bucket.storage.googleapis.com/
484              "name": "A String", # The name of the package.
485            },
486          ],
487          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
488              # service will attempt to choose a reasonable default.
489          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
490              # the service will use the network "default".
491          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
492              # will attempt to choose a reasonable default.
493          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
494              # attempt to choose a reasonable default.
495          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
496              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
497              # `TEARDOWN_NEVER`.
498              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
499              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
500              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
501              # down.
502              #
503              # If the workers are not torn down by the service, they will
504              # continue to run and use Google Compute Engine VM resources in the
505              # user's project until they are explicitly terminated by the user.
506              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
507              # policy except for small, manually supervised test jobs.
508              #
509              # If unknown or unspecified, the service will attempt to choose a reasonable
510              # default.
511          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
512              # Compute Engine API.
513          "ipConfiguration": "A String", # Configuration for VM IPs.
514          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
515              # service will choose a number of threads (according to the number of cores
516              # on the selected machine type for batch, or 1 by convention for streaming).
517          "poolArgs": { # Extra arguments for this worker pool.
518            "a_key": "", # Properties of the object. Contains field @type with type URL.
519          },
520          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
521              # execute the job.  If zero or unspecified, the service will
522              # attempt to choose a reasonable default.
523          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
524              # harness, residing in Google Container Registry.
525          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
526              # the form "regions/REGION/subnetworks/SUBNETWORK".
527          "dataDisks": [ # Data disks that are used by a VM in this workflow.
528            { # Describes the data disk used by a workflow job.
529              "mountPoint": "A String", # Directory in a VM where disk is mounted.
530              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
531                  # attempt to choose a reasonable default.
532              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
533                  # must be a disk type appropriate to the project and zone in which
534                  # the workers will run.  If unknown or unspecified, the service
535                  # will attempt to choose a reasonable default.
536                  #
537                  # For example, the standard persistent disk type is a resource name
538                  # typically ending in "pd-standard".  If SSD persistent disks are
539                  # available, the resource name typically ends with "pd-ssd".  The
540                  # actual valid values are defined the Google Compute Engine API,
541                  # not by the Cloud Dataflow API; consult the Google Compute Engine
542                  # documentation for more information about determining the set of
543                  # available disk types for a particular project and zone.
544                  #
545                  # Google Compute Engine Disk types are local to a particular
546                  # project in a particular zone, and so the resource name will
547                  # typically look something like this:
548                  #
549                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
550            },
551          ],
552          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
553            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
554            "algorithm": "A String", # The algorithm to use for autoscaling.
555          },
556          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
557              # select a default set of packages which are useful to worker
558              # harnesses written in a particular language.
559          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
560              # attempt to choose a reasonable default.
561          "metadata": { # Metadata to set on the Google Compute Engine VMs.
562            "a_key": "A String",
563          },
564        },
565      ],
566      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
567          # storage.  The system will append the suffix "/temp-{JOBNAME} to
568          # this resource prefix, where {JOBNAME} is the value of the
569          # job_name field.  The resulting bucket and object prefix is used
570          # as the prefix of the resources used to store temporary data
571          # needed during the job execution.  NOTE: This will override the
572          # value in taskrunner_settings.
573          # The supported resource type is:
574          #
575          # Google Cloud Storage:
576          #
577          #   storage.googleapis.com/{bucket}/{object}
578          #   bucket.storage.googleapis.com/{object}
579    },
580    "location": "A String", # The [regional endpoint]
581        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
582        # contains this job.
583    "tempFiles": [ # A set of files the system should be aware of that are used
584        # for temporary storage. These temporary files will be
585        # removed on job completion.
586        # No duplicates are allowed.
587        # No file patterns are supported.
588        #
589        # The supported files are:
590        #
591        # Google Cloud Storage:
592        #
593        #    storage.googleapis.com/{bucket}/{object}
594        #    bucket.storage.googleapis.com/{object}
595      "A String",
596    ],
597    "type": "A String", # The type of Cloud Dataflow job.
598    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
599        # If this field is set, the service will ensure its uniqueness.
600        # The request to create a job will fail if the service has knowledge of a
601        # previously submitted job with the same client's ID and job name.
602        # The caller may use this field to ensure idempotence of job
603        # creation across retried attempts to create a job.
604        # By default, the field is empty and, in that case, the service ignores it.
605    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
606        # snapshot.
607    "stepsLocation": "A String", # The GCS location where the steps are stored.
608    "currentStateTime": "A String", # The timestamp associated with the current state.
609    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
610        # Flexible resource scheduling jobs are started with some delay after job
611        # creation, so start_time is unset before start and is updated when the
612        # job is started by the Cloud Dataflow service. For other jobs, start_time
613        # always equals to create_time and is immutable and set by the Cloud Dataflow
614        # service.
615    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
616        # Cloud Dataflow service.
617    "requestedState": "A String", # The job's requested state.
618        #
619        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
620        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
621        # also be used to directly set a job's requested state to
622        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
623        # job if it has not already reached a terminal state.
624    "name": "A String", # The user-specified Cloud Dataflow job name.
625        #
626        # Only one Job with a given name may exist in a project at any
627        # given time. If a caller attempts to create a Job with the same
628        # name as an already-existing Job, the attempt returns the
629        # existing Job.
630        #
631        # The name must match the regular expression
632        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
633    "steps": [ # Exactly one of step or steps_location should be specified.
634        #
635        # The top-level steps that constitute the entire job.
636      { # Defines a particular step within a Cloud Dataflow job.
637          #
638          # A job consists of multiple steps, each of which performs some
639          # specific operation as part of the overall job.  Data is typically
640          # passed from one step to another as part of the job.
641          #
642          # Here's an example of a sequence of steps which together implement a
643          # Map-Reduce job:
644          #
645          #   * Read a collection of data from some source, parsing the
646          #     collection's elements.
647          #
648          #   * Validate the elements.
649          #
650          #   * Apply a user-defined function to map each element to some value
651          #     and extract an element-specific key value.
652          #
653          #   * Group elements with the same key into a single element with
654          #     that key, transforming a multiply-keyed collection into a
655          #     uniquely-keyed collection.
656          #
657          #   * Write the elements out to some data sink.
658          #
659          # Note that the Cloud Dataflow service may be used to run many different
660          # types of jobs, not just Map-Reduce.
661        "kind": "A String", # The kind of step in the Cloud Dataflow job.
662        "properties": { # Named properties associated with the step. Each kind of
663            # predefined step has its own required set of properties.
664            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
665          "a_key": "", # Properties of the object.
666        },
667        "name": "A String", # The name that identifies the step. This must be unique for each
668            # step with respect to all other steps in the Cloud Dataflow job.
669      },
670    ],
671    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
672        # of the job it replaced.
673        #
674        # When sending a `CreateJobRequest`, you can update a job by specifying it
675        # here. The job named here is stopped, and its intermediate state is
676        # transferred to this job.
677    "currentState": "A String", # The current state of the job.
678        #
679        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
680        # specified.
681        #
682        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
683        # terminal state. After a job has reached a terminal state, no
684        # further state updates may be made.
685        #
686        # This field may be mutated by the Cloud Dataflow service;
687        # callers cannot mutate it.
688    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
689        # isn't contained in the submitted job.
690      "stages": { # A mapping from each stage to the information about that stage.
691        "a_key": { # Contains information about how a particular
692            # google.dataflow.v1beta3.Step will be executed.
693          "stepName": [ # The steps associated with the execution stage.
694              # Note that stages may have several steps, and that a given step
695              # might be run by more than one stage.
696            "A String",
697          ],
698        },
699      },
700    },
701  }</pre>
702</div>
703
704<div class="method">
705    <code class="details" id="get">get(projectId, location, gcsPath=None, x__xgafv=None, view=None)</code>
706  <pre>Get the template associated with a template.
707
708Args:
709  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
710  location: string, The [regional endpoint]
711(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
712which to direct the request. (required)
713  gcsPath: string, Required. A Cloud Storage path to the template from which to
714create the job.
715Must be valid Cloud Storage URL, beginning with 'gs://'.
716  x__xgafv: string, V1 error format.
717    Allowed values
718      1 - v1 error format
719      2 - v2 error format
720  view: string, The view to retrieve. Defaults to METADATA_ONLY.
721
722Returns:
723  An object of the form:
724
725    { # The response to a GetTemplate request.
726    "status": { # The `Status` type defines a logical error model that is suitable for # The status of the get template request. Any problems with the
727        # request will be indicated in the error_details.
728        # different programming environments, including REST APIs and RPC APIs. It is
729        # used by [gRPC](https://github.com/grpc). The error model is designed to be:
730        #
731        # - Simple to use and understand for most users
732        # - Flexible enough to meet unexpected needs
733        #
734        # # Overview
735        #
736        # The `Status` message contains three pieces of data: error code, error
737        # message, and error details. The error code should be an enum value of
738        # google.rpc.Code, but it may accept additional error codes if needed.  The
739        # error message should be a developer-facing English message that helps
740        # developers *understand* and *resolve* the error. If a localized user-facing
741        # error message is needed, put the localized message in the error details or
742        # localize it in the client. The optional error details may contain arbitrary
743        # information about the error. There is a predefined set of error detail types
744        # in the package `google.rpc` that can be used for common error conditions.
745        #
746        # # Language mapping
747        #
748        # The `Status` message is the logical representation of the error model, but it
749        # is not necessarily the actual wire format. When the `Status` message is
750        # exposed in different client libraries and different wire protocols, it can be
751        # mapped differently. For example, it will likely be mapped to some exceptions
752        # in Java, but more likely mapped to some error codes in C.
753        #
754        # # Other uses
755        #
756        # The error model and the `Status` message can be used in a variety of
757        # environments, either with or without APIs, to provide a
758        # consistent developer experience across different environments.
759        #
760        # Example uses of this error model include:
761        #
762        # - Partial errors. If a service needs to return partial errors to the client,
763        #     it may embed the `Status` in the normal response to indicate the partial
764        #     errors.
765        #
766        # - Workflow errors. A typical workflow has multiple steps. Each step may
767        #     have a `Status` message for error reporting.
768        #
769        # - Batch operations. If a client uses batch request and batch response, the
770        #     `Status` message should be used directly inside batch response, one for
771        #     each error sub-response.
772        #
773        # - Asynchronous operations. If an API call embeds asynchronous operation
774        #     results in its response, the status of those operations should be
775        #     represented directly using the `Status` message.
776        #
777        # - Logging. If some API errors are stored in logs, the message `Status` could
778        #     be used directly after any stripping needed for security/privacy reasons.
779      "message": "A String", # A developer-facing error message, which should be in English. Any
780          # user-facing error message should be localized and sent in the
781          # google.rpc.Status.details field, or localized by the client.
782      "code": 42, # The status code, which should be an enum value of google.rpc.Code.
783      "details": [ # A list of messages that carry the error details.  There is a common set of
784          # message types for APIs to use.
785        {
786          "a_key": "", # Properties of the object. Contains field @type with type URL.
787        },
788      ],
789    },
790    "metadata": { # Metadata describing a template. # The template metadata describing the template name, available
791        # parameters, etc.
792      "name": "A String", # Required. The name of the template.
793      "parameters": [ # The parameters for the template.
794        { # Metadata for a specific parameter.
795          "regexes": [ # Optional. Regexes that the parameter must match.
796            "A String",
797          ],
798          "helpText": "A String", # Required. The help text to display for the parameter.
799          "name": "A String", # Required. The name of the parameter.
800          "isOptional": True or False, # Optional. Whether the parameter is optional. Defaults to false.
801          "label": "A String", # Required. The label to display for the parameter.
802        },
803      ],
804      "description": "A String", # Optional. A description of the template.
805    },
806  }</pre>
807</div>
808
809<div class="method">
810    <code class="details" id="launch">launch(projectId, location, body, dynamicTemplate_gcsPath=None, x__xgafv=None, dynamicTemplate_stagingLocation=None, gcsPath=None, validateOnly=None)</code>
811  <pre>Launch a template.
812
813Args:
814  projectId: string, Required. The ID of the Cloud Platform project that the job belongs to. (required)
815  location: string, The [regional endpoint]
816(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) to
817which to direct the request. (required)
818  body: object, The request body. (required)
819    The object takes the form of:
820
821{ # Parameters to provide to the template being launched.
822    "environment": { # The environment values to set at runtime. # The runtime environment for the job.
823      "machineType": "A String", # The machine type to use for the job. Defaults to the value from the
824          # template if not specified.
825      "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
826          # the service will use the network "default".
827      "zone": "A String", # The Compute Engine [availability
828          # zone](https://cloud.google.com/compute/docs/regions-zones/regions-zones)
829          # for launching worker instances to run your pipeline.
830      "additionalUserLabels": { # Additional user labels to be specified for the job.
831          # Keys and values should follow the restrictions specified in the [labeling
832          # restrictions](https://cloud.google.com/compute/docs/labeling-resources#restrictions)
833          # page.
834        "a_key": "A String",
835      },
836      "additionalExperiments": [ # Additional experiment flags for the job.
837        "A String",
838      ],
839      "bypassTempDirValidation": True or False, # Whether to bypass the safety checks for the job's temporary directory.
840          # Use with caution.
841      "tempLocation": "A String", # The Cloud Storage path to use for temporary files.
842          # Must be a valid Cloud Storage URL, beginning with `gs://`.
843      "serviceAccountEmail": "A String", # The email address of the service account to run the job as.
844      "numWorkers": 42, # The initial number of Google Compute Engine instnaces for the job.
845      "maxWorkers": 42, # The maximum number of Google Compute Engine instances to be made
846          # available to your pipeline during execution, from 1 to 1000.
847      "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
848          # the form "regions/REGION/subnetworks/SUBNETWORK".
849    },
850    "parameters": { # The runtime parameters to pass to the job.
851      "a_key": "A String",
852    },
853    "jobName": "A String", # Required. The job name to use for the created job.
854  }
855
856  dynamicTemplate_gcsPath: string, Path to dynamic template spec file on GCS.
857The file must be a Json serialized DynamicTemplateFieSpec object.
858  x__xgafv: string, V1 error format.
859    Allowed values
860      1 - v1 error format
861      2 - v2 error format
862  dynamicTemplate_stagingLocation: string, Cloud Storage path for staging dependencies.
863Must be a valid Cloud Storage URL, beginning with `gs://`.
864  gcsPath: string, A Cloud Storage path to the template from which to create
865the job.
866Must be valid Cloud Storage URL, beginning with 'gs://'.
867  validateOnly: boolean, If true, the request is validated but not actually executed.
868Defaults to false.
869
870Returns:
871  An object of the form:
872
873    { # Response to the request to launch a template.
874    "job": { # Defines a job to be run by the Cloud Dataflow service. # The job that was launched, if the request was not a dry run and
875        # the job was successfully launched.
876      "labels": { # User-defined labels for this job.
877          #
878          # The labels map can contain no more than 64 entries.  Entries of the labels
879          # map are UTF8 strings that comply with the following restrictions:
880          #
881          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
882          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
883          # * Both keys and values are additionally constrained to be <= 128 bytes in
884          # size.
885        "a_key": "A String",
886      },
887      "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
888          # by the metadata values provided here. Populated for ListJobs and all GetJob
889          # views SUMMARY and higher.
890          # ListJob response and Job SUMMARY view.
891        "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
892          "versionDisplayName": "A String", # A readable string describing the version of the SDK.
893          "version": "A String", # The version of the SDK used to run the job.
894          "sdkSupportStatus": "A String", # The support status for this SDK version.
895        },
896        "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
897          { # Metadata for a PubSub connector used by the job.
898            "topic": "A String", # Topic accessed in the connection.
899            "subscription": "A String", # Subscription used in the connection.
900          },
901        ],
902        "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
903          { # Metadata for a Datastore connector used by the job.
904            "projectId": "A String", # ProjectId accessed in the connection.
905            "namespace": "A String", # Namespace used in the connection.
906          },
907        ],
908        "fileDetails": [ # Identification of a File source used in the Dataflow job.
909          { # Metadata for a File connector used by the job.
910            "filePattern": "A String", # File Pattern used to access files by the connector.
911          },
912        ],
913        "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
914          { # Metadata for a Spanner connector used by the job.
915            "instanceId": "A String", # InstanceId accessed in the connection.
916            "projectId": "A String", # ProjectId accessed in the connection.
917            "databaseId": "A String", # DatabaseId accessed in the connection.
918          },
919        ],
920        "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
921          { # Metadata for a BigTable connector used by the job.
922            "instanceId": "A String", # InstanceId accessed in the connection.
923            "projectId": "A String", # ProjectId accessed in the connection.
924            "tableId": "A String", # TableId accessed in the connection.
925          },
926        ],
927        "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
928          { # Metadata for a BigQuery connector used by the job.
929            "projectId": "A String", # Project accessed in the connection.
930            "dataset": "A String", # Dataset accessed in the connection.
931            "table": "A String", # Table accessed in the connection.
932            "query": "A String", # Query used to access data in the connection.
933          },
934        ],
935      },
936      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
937          # A description of the user pipeline and stages through which it is executed.
938          # Created by Cloud Dataflow service.  Only retrieved with
939          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
940          # form.  This data is provided by the Dataflow service for ease of visualizing
941          # the pipeline and interpreting Dataflow provided metrics.
942        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
943          { # Description of the type, names/ids, and input/outputs for a transform.
944            "kind": "A String", # Type of transform.
945            "name": "A String", # User provided name for this transform instance.
946            "inputCollectionName": [ # User names for all collection inputs to this transform.
947              "A String",
948            ],
949            "displayData": [ # Transform-specific display data.
950              { # Data provided with a pipeline or transform to provide descriptive info.
951                "shortStrValue": "A String", # A possible additional shorter value to display.
952                    # For example a java_class_name_value of com.mypackage.MyDoFn
953                    # will be stored with MyDoFn as the short_str_value and
954                    # com.mypackage.MyDoFn as the java_class_name value.
955                    # short_str_value can be displayed and java_class_name_value
956                    # will be displayed as a tooltip.
957                "durationValue": "A String", # Contains value if the data is of duration type.
958                "url": "A String", # An optional full URL.
959                "floatValue": 3.14, # Contains value if the data is of float type.
960                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
961                    # language namespace (i.e. python module) which defines the display data.
962                    # This allows a dax monitoring system to specially handle the data
963                    # and perform custom rendering.
964                "javaClassValue": "A String", # Contains value if the data is of java class type.
965                "label": "A String", # An optional label to display in a dax UI for the element.
966                "boolValue": True or False, # Contains value if the data is of a boolean type.
967                "strValue": "A String", # Contains value if the data is of string type.
968                "key": "A String", # The key identifying the display data.
969                    # This is intended to be used as a label for the display data
970                    # when viewed in a dax monitoring system.
971                "int64Value": "A String", # Contains value if the data is of int64 type.
972                "timestampValue": "A String", # Contains value if the data is of timestamp type.
973              },
974            ],
975            "outputCollectionName": [ # User  names for all collection outputs to this transform.
976              "A String",
977            ],
978            "id": "A String", # SDK generated id of this transform instance.
979          },
980        ],
981        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
982          { # Description of the composing transforms, names/ids, and input/outputs of a
983              # stage of execution.  Some composing transforms and sources may have been
984              # generated by the Dataflow service during execution planning.
985            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
986              { # Description of an interstitial value between transforms in an execution
987                  # stage.
988                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
989                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
990                    # source is most closely associated.
991                "name": "A String", # Dataflow service generated name for this source.
992              },
993            ],
994            "kind": "A String", # Type of tranform this stage is executing.
995            "name": "A String", # Dataflow service generated name for this stage.
996            "outputSource": [ # Output sources for this stage.
997              { # Description of an input or output of an execution stage.
998                "userName": "A String", # Human-readable name for this source; may be user or system generated.
999                "sizeBytes": "A String", # Size of the source, if measurable.
1000                "name": "A String", # Dataflow service generated name for this source.
1001                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1002                    # source is most closely associated.
1003              },
1004            ],
1005            "inputSource": [ # Input sources for this stage.
1006              { # Description of an input or output of an execution stage.
1007                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1008                "sizeBytes": "A String", # Size of the source, if measurable.
1009                "name": "A String", # Dataflow service generated name for this source.
1010                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1011                    # source is most closely associated.
1012              },
1013            ],
1014            "componentTransform": [ # Transforms that comprise this execution stage.
1015              { # Description of a transform executed as part of an execution stage.
1016                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1017                "originalTransform": "A String", # User name for the original user transform with which this transform is
1018                    # most closely associated.
1019                "name": "A String", # Dataflow service generated name for this source.
1020              },
1021            ],
1022            "id": "A String", # Dataflow service generated id for this stage.
1023          },
1024        ],
1025        "displayData": [ # Pipeline level display data.
1026          { # Data provided with a pipeline or transform to provide descriptive info.
1027            "shortStrValue": "A String", # A possible additional shorter value to display.
1028                # For example a java_class_name_value of com.mypackage.MyDoFn
1029                # will be stored with MyDoFn as the short_str_value and
1030                # com.mypackage.MyDoFn as the java_class_name value.
1031                # short_str_value can be displayed and java_class_name_value
1032                # will be displayed as a tooltip.
1033            "durationValue": "A String", # Contains value if the data is of duration type.
1034            "url": "A String", # An optional full URL.
1035            "floatValue": 3.14, # Contains value if the data is of float type.
1036            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1037                # language namespace (i.e. python module) which defines the display data.
1038                # This allows a dax monitoring system to specially handle the data
1039                # and perform custom rendering.
1040            "javaClassValue": "A String", # Contains value if the data is of java class type.
1041            "label": "A String", # An optional label to display in a dax UI for the element.
1042            "boolValue": True or False, # Contains value if the data is of a boolean type.
1043            "strValue": "A String", # Contains value if the data is of string type.
1044            "key": "A String", # The key identifying the display data.
1045                # This is intended to be used as a label for the display data
1046                # when viewed in a dax monitoring system.
1047            "int64Value": "A String", # Contains value if the data is of int64 type.
1048            "timestampValue": "A String", # Contains value if the data is of timestamp type.
1049          },
1050        ],
1051      },
1052      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1053          # callers cannot mutate it.
1054        { # A message describing the state of a particular execution stage.
1055          "executionStageName": "A String", # The name of the execution stage.
1056          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1057          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1058        },
1059      ],
1060      "id": "A String", # The unique ID of this job.
1061          #
1062          # This field is set by the Cloud Dataflow service when the Job is
1063          # created, and is immutable for the life of the job.
1064      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1065          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1066      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1067      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1068          # corresponding name prefixes of the new job.
1069        "a_key": "A String",
1070      },
1071      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1072        "version": { # A structure describing which components and their versions of the service
1073            # are required in order to run the job.
1074          "a_key": "", # Properties of the object.
1075        },
1076        "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1077        "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1078            # at rest, AKA a Customer Managed Encryption Key (CMEK).
1079            #
1080            # Format:
1081            #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
1082        "internalExperiments": { # Experimental settings.
1083          "a_key": "", # Properties of the object. Contains field @type with type URL.
1084        },
1085        "dataset": "A String", # The dataset for the current project where various workflow
1086            # related tables are stored.
1087            #
1088            # The supported resource type is:
1089            #
1090            # Google BigQuery:
1091            #   bigquery.googleapis.com/{dataset}
1092        "experiments": [ # The list of experiments to enable.
1093          "A String",
1094        ],
1095        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1096        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1097            # options are passed through the service and are used to recreate the
1098            # SDK pipeline options on the worker in a language agnostic and platform
1099            # independent way.
1100          "a_key": "", # Properties of the object.
1101        },
1102        "userAgent": { # A description of the process that generated the request.
1103          "a_key": "", # Properties of the object.
1104        },
1105        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1106            # unspecified, the service will attempt to choose a reasonable
1107            # default.  This should be in the form of the API service name,
1108            # e.g. "compute.googleapis.com".
1109        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1110            # specified in order for the job to have workers.
1111          { # Describes one particular pool of Cloud Dataflow workers to be
1112              # instantiated by the Cloud Dataflow service in order to perform the
1113              # computations required by a job.  Note that a workflow job may use
1114              # multiple pools, in order to match the various computational
1115              # requirements of the various stages of the job.
1116            "diskSourceImage": "A String", # Fully qualified source image for disks.
1117            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1118                # using the standard Dataflow task runner.  Users should ignore
1119                # this field.
1120              "workflowFileName": "A String", # The file to store the workflow in.
1121              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1122                  # will not be uploaded.
1123                  #
1124                  # The supported resource type is:
1125                  #
1126                  # Google Cloud Storage:
1127                  #   storage.googleapis.com/{bucket}/{object}
1128                  #   bucket.storage.googleapis.com/{object}
1129              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1130              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1131                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1132                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1133                    # "shuffle/v1beta1".
1134                "workerId": "A String", # The ID of the worker running this pipeline.
1135                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1136                    #
1137                    # When workers access Google Cloud APIs, they logically do so via
1138                    # relative URLs.  If this field is specified, it supplies the base
1139                    # URL to use for resolving these relative URLs.  The normative
1140                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1141                    # Locators".
1142                    #
1143                    # If not specified, the default value is "http://www.googleapis.com/"
1144                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1145                    # "dataflow/v1b3/projects".
1146                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1147                    # storage.
1148                    #
1149                    # The supported resource type is:
1150                    #
1151                    # Google Cloud Storage:
1152                    #
1153                    #   storage.googleapis.com/{bucket}/{object}
1154                    #   bucket.storage.googleapis.com/{object}
1155              },
1156              "vmId": "A String", # The ID string of the VM.
1157              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1158              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1159              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1160                  # access the Cloud Dataflow API.
1161                "A String",
1162              ],
1163              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1164                  # taskrunner; e.g. "root".
1165              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1166                  #
1167                  # When workers access Google Cloud APIs, they logically do so via
1168                  # relative URLs.  If this field is specified, it supplies the base
1169                  # URL to use for resolving these relative URLs.  The normative
1170                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1171                  # Locators".
1172                  #
1173                  # If not specified, the default value is "http://www.googleapis.com/"
1174              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1175                  # taskrunner; e.g. "wheel".
1176              "languageHint": "A String", # The suggested backend language.
1177              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1178                  # console.
1179              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1180              "logDir": "A String", # The directory on the VM to store logs.
1181              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1182              "harnessCommand": "A String", # The command to launch the worker harness.
1183              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1184                  # temporary storage.
1185                  #
1186                  # The supported resource type is:
1187                  #
1188                  # Google Cloud Storage:
1189                  #   storage.googleapis.com/{bucket}/{object}
1190                  #   bucket.storage.googleapis.com/{object}
1191              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1192            },
1193            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1194                # are supported.
1195            "packages": [ # Packages to be installed on workers.
1196              { # The packages that must be installed in order for a worker to run the
1197                  # steps of the Cloud Dataflow job that will be assigned to its worker
1198                  # pool.
1199                  #
1200                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
1201                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1202                  # might use this to install jars containing the user's code and all of the
1203                  # various dependencies (libraries, data files, etc.) required in order
1204                  # for that code to run.
1205                "location": "A String", # The resource to read the package from. The supported resource type is:
1206                    #
1207                    # Google Cloud Storage:
1208                    #
1209                    #   storage.googleapis.com/{bucket}
1210                    #   bucket.storage.googleapis.com/
1211                "name": "A String", # The name of the package.
1212              },
1213            ],
1214            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1215                # service will attempt to choose a reasonable default.
1216            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1217                # the service will use the network "default".
1218            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1219                # will attempt to choose a reasonable default.
1220            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1221                # attempt to choose a reasonable default.
1222            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1223                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1224                # `TEARDOWN_NEVER`.
1225                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1226                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1227                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1228                # down.
1229                #
1230                # If the workers are not torn down by the service, they will
1231                # continue to run and use Google Compute Engine VM resources in the
1232                # user's project until they are explicitly terminated by the user.
1233                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1234                # policy except for small, manually supervised test jobs.
1235                #
1236                # If unknown or unspecified, the service will attempt to choose a reasonable
1237                # default.
1238            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1239                # Compute Engine API.
1240            "ipConfiguration": "A String", # Configuration for VM IPs.
1241            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1242                # service will choose a number of threads (according to the number of cores
1243                # on the selected machine type for batch, or 1 by convention for streaming).
1244            "poolArgs": { # Extra arguments for this worker pool.
1245              "a_key": "", # Properties of the object. Contains field @type with type URL.
1246            },
1247            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1248                # execute the job.  If zero or unspecified, the service will
1249                # attempt to choose a reasonable default.
1250            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1251                # harness, residing in Google Container Registry.
1252            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1253                # the form "regions/REGION/subnetworks/SUBNETWORK".
1254            "dataDisks": [ # Data disks that are used by a VM in this workflow.
1255              { # Describes the data disk used by a workflow job.
1256                "mountPoint": "A String", # Directory in a VM where disk is mounted.
1257                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1258                    # attempt to choose a reasonable default.
1259                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1260                    # must be a disk type appropriate to the project and zone in which
1261                    # the workers will run.  If unknown or unspecified, the service
1262                    # will attempt to choose a reasonable default.
1263                    #
1264                    # For example, the standard persistent disk type is a resource name
1265                    # typically ending in "pd-standard".  If SSD persistent disks are
1266                    # available, the resource name typically ends with "pd-ssd".  The
1267                    # actual valid values are defined the Google Compute Engine API,
1268                    # not by the Cloud Dataflow API; consult the Google Compute Engine
1269                    # documentation for more information about determining the set of
1270                    # available disk types for a particular project and zone.
1271                    #
1272                    # Google Compute Engine Disk types are local to a particular
1273                    # project in a particular zone, and so the resource name will
1274                    # typically look something like this:
1275                    #
1276                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1277              },
1278            ],
1279            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1280              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1281              "algorithm": "A String", # The algorithm to use for autoscaling.
1282            },
1283            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1284                # select a default set of packages which are useful to worker
1285                # harnesses written in a particular language.
1286            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1287                # attempt to choose a reasonable default.
1288            "metadata": { # Metadata to set on the Google Compute Engine VMs.
1289              "a_key": "A String",
1290            },
1291          },
1292        ],
1293        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1294            # storage.  The system will append the suffix "/temp-{JOBNAME} to
1295            # this resource prefix, where {JOBNAME} is the value of the
1296            # job_name field.  The resulting bucket and object prefix is used
1297            # as the prefix of the resources used to store temporary data
1298            # needed during the job execution.  NOTE: This will override the
1299            # value in taskrunner_settings.
1300            # The supported resource type is:
1301            #
1302            # Google Cloud Storage:
1303            #
1304            #   storage.googleapis.com/{bucket}/{object}
1305            #   bucket.storage.googleapis.com/{object}
1306      },
1307      "location": "A String", # The [regional endpoint]
1308          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1309          # contains this job.
1310      "tempFiles": [ # A set of files the system should be aware of that are used
1311          # for temporary storage. These temporary files will be
1312          # removed on job completion.
1313          # No duplicates are allowed.
1314          # No file patterns are supported.
1315          #
1316          # The supported files are:
1317          #
1318          # Google Cloud Storage:
1319          #
1320          #    storage.googleapis.com/{bucket}/{object}
1321          #    bucket.storage.googleapis.com/{object}
1322        "A String",
1323      ],
1324      "type": "A String", # The type of Cloud Dataflow job.
1325      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1326          # If this field is set, the service will ensure its uniqueness.
1327          # The request to create a job will fail if the service has knowledge of a
1328          # previously submitted job with the same client's ID and job name.
1329          # The caller may use this field to ensure idempotence of job
1330          # creation across retried attempts to create a job.
1331          # By default, the field is empty and, in that case, the service ignores it.
1332      "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1333          # snapshot.
1334      "stepsLocation": "A String", # The GCS location where the steps are stored.
1335      "currentStateTime": "A String", # The timestamp associated with the current state.
1336      "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1337          # Flexible resource scheduling jobs are started with some delay after job
1338          # creation, so start_time is unset before start and is updated when the
1339          # job is started by the Cloud Dataflow service. For other jobs, start_time
1340          # always equals to create_time and is immutable and set by the Cloud Dataflow
1341          # service.
1342      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1343          # Cloud Dataflow service.
1344      "requestedState": "A String", # The job's requested state.
1345          #
1346          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1347          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1348          # also be used to directly set a job's requested state to
1349          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1350          # job if it has not already reached a terminal state.
1351      "name": "A String", # The user-specified Cloud Dataflow job name.
1352          #
1353          # Only one Job with a given name may exist in a project at any
1354          # given time. If a caller attempts to create a Job with the same
1355          # name as an already-existing Job, the attempt returns the
1356          # existing Job.
1357          #
1358          # The name must match the regular expression
1359          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1360      "steps": [ # Exactly one of step or steps_location should be specified.
1361          #
1362          # The top-level steps that constitute the entire job.
1363        { # Defines a particular step within a Cloud Dataflow job.
1364            #
1365            # A job consists of multiple steps, each of which performs some
1366            # specific operation as part of the overall job.  Data is typically
1367            # passed from one step to another as part of the job.
1368            #
1369            # Here's an example of a sequence of steps which together implement a
1370            # Map-Reduce job:
1371            #
1372            #   * Read a collection of data from some source, parsing the
1373            #     collection's elements.
1374            #
1375            #   * Validate the elements.
1376            #
1377            #   * Apply a user-defined function to map each element to some value
1378            #     and extract an element-specific key value.
1379            #
1380            #   * Group elements with the same key into a single element with
1381            #     that key, transforming a multiply-keyed collection into a
1382            #     uniquely-keyed collection.
1383            #
1384            #   * Write the elements out to some data sink.
1385            #
1386            # Note that the Cloud Dataflow service may be used to run many different
1387            # types of jobs, not just Map-Reduce.
1388          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1389          "properties": { # Named properties associated with the step. Each kind of
1390              # predefined step has its own required set of properties.
1391              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1392            "a_key": "", # Properties of the object.
1393          },
1394          "name": "A String", # The name that identifies the step. This must be unique for each
1395              # step with respect to all other steps in the Cloud Dataflow job.
1396        },
1397      ],
1398      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1399          # of the job it replaced.
1400          #
1401          # When sending a `CreateJobRequest`, you can update a job by specifying it
1402          # here. The job named here is stopped, and its intermediate state is
1403          # transferred to this job.
1404      "currentState": "A String", # The current state of the job.
1405          #
1406          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1407          # specified.
1408          #
1409          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1410          # terminal state. After a job has reached a terminal state, no
1411          # further state updates may be made.
1412          #
1413          # This field may be mutated by the Cloud Dataflow service;
1414          # callers cannot mutate it.
1415      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1416          # isn't contained in the submitted job.
1417        "stages": { # A mapping from each stage to the information about that stage.
1418          "a_key": { # Contains information about how a particular
1419              # google.dataflow.v1beta3.Step will be executed.
1420            "stepName": [ # The steps associated with the execution stage.
1421                # Note that stages may have several steps, and that a given step
1422                # might be run by more than one stage.
1423              "A String",
1424            ],
1425          },
1426        },
1427      },
1428    },
1429  }</pre>
1430</div>
1431
1432</body></html>