1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="dataflow_v1b3.projects.locations.jobs.debug.html">debug()</a></code>
79</p>
80<p class="firstline">Returns the debug Resource.</p>
81
82<p class="toc_element">
83  <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
84</p>
85<p class="firstline">Returns the messages Resource.</p>
86
87<p class="toc_element">
88  <code><a href="dataflow_v1b3.projects.locations.jobs.snapshots.html">snapshots()</a></code>
89</p>
90<p class="firstline">Returns the snapshots Resource.</p>
91
92<p class="toc_element">
93  <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
94</p>
95<p class="firstline">Returns the workItems Resource.</p>
96
97<p class="toc_element">
98  <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
99<p class="firstline">Creates a Cloud Dataflow job.</p>
100<p class="toc_element">
101  <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
102<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
103<p class="toc_element">
104  <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
105<p class="firstline">Request the job status.</p>
106<p class="toc_element">
107  <code><a href="#list">list(projectId, location, pageSize=None, pageToken=None, x__xgafv=None, filter=None, view=None)</a></code></p>
108<p class="firstline">List the jobs of a project.</p>
109<p class="toc_element">
110  <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
111<p class="firstline">Retrieves the next page of results.</p>
112<p class="toc_element">
113  <code><a href="#snapshot">snapshot(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
114<p class="firstline">Snapshot the state of a streaming job.</p>
115<p class="toc_element">
116  <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
117<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
118<h3>Method Details</h3>
119<div class="method">
120    <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
121  <pre>Creates a Cloud Dataflow job.
122
123To create a job, we recommend using `projects.locations.jobs.create` with a
124[regional endpoint]
125(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
126`projects.jobs.create` is not recommended, as your job will always start
127in `us-central1`.
128
129Args:
130  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
131  location: string, The [regional endpoint]
132(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
133contains this job. (required)
134  body: object, The request body. (required)
135    The object takes the form of:
136
137{ # Defines a job to be run by the Cloud Dataflow service.
138  "labels": { # User-defined labels for this job.
139      #
140      # The labels map can contain no more than 64 entries.  Entries of the labels
141      # map are UTF8 strings that comply with the following restrictions:
142      #
143      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
144      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
145      # * Both keys and values are additionally constrained to be <= 128 bytes in
146      # size.
147    "a_key": "A String",
148  },
149  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
150      # by the metadata values provided here. Populated for ListJobs and all GetJob
151      # views SUMMARY and higher.
152      # ListJob response and Job SUMMARY view.
153    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
154      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
155      "version": "A String", # The version of the SDK used to run the job.
156      "sdkSupportStatus": "A String", # The support status for this SDK version.
157    },
158    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
159      { # Metadata for a PubSub connector used by the job.
160        "topic": "A String", # Topic accessed in the connection.
161        "subscription": "A String", # Subscription used in the connection.
162      },
163    ],
164    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
165      { # Metadata for a Datastore connector used by the job.
166        "projectId": "A String", # ProjectId accessed in the connection.
167        "namespace": "A String", # Namespace used in the connection.
168      },
169    ],
170    "fileDetails": [ # Identification of a File source used in the Dataflow job.
171      { # Metadata for a File connector used by the job.
172        "filePattern": "A String", # File Pattern used to access files by the connector.
173      },
174    ],
175    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
176      { # Metadata for a Spanner connector used by the job.
177        "instanceId": "A String", # InstanceId accessed in the connection.
178        "projectId": "A String", # ProjectId accessed in the connection.
179        "databaseId": "A String", # DatabaseId accessed in the connection.
180      },
181    ],
182    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
183      { # Metadata for a BigTable connector used by the job.
184        "instanceId": "A String", # InstanceId accessed in the connection.
185        "projectId": "A String", # ProjectId accessed in the connection.
186        "tableId": "A String", # TableId accessed in the connection.
187      },
188    ],
189    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
190      { # Metadata for a BigQuery connector used by the job.
191        "projectId": "A String", # Project accessed in the connection.
192        "dataset": "A String", # Dataset accessed in the connection.
193        "table": "A String", # Table accessed in the connection.
194        "query": "A String", # Query used to access data in the connection.
195      },
196    ],
197  },
198  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
199      # A description of the user pipeline and stages through which it is executed.
200      # Created by Cloud Dataflow service.  Only retrieved with
201      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
202      # form.  This data is provided by the Dataflow service for ease of visualizing
203      # the pipeline and interpreting Dataflow provided metrics.
204    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
205      { # Description of the type, names/ids, and input/outputs for a transform.
206        "kind": "A String", # Type of transform.
207        "name": "A String", # User provided name for this transform instance.
208        "inputCollectionName": [ # User names for all collection inputs to this transform.
209          "A String",
210        ],
211        "displayData": [ # Transform-specific display data.
212          { # Data provided with a pipeline or transform to provide descriptive info.
213            "shortStrValue": "A String", # A possible additional shorter value to display.
214                # For example a java_class_name_value of com.mypackage.MyDoFn
215                # will be stored with MyDoFn as the short_str_value and
216                # com.mypackage.MyDoFn as the java_class_name value.
217                # short_str_value can be displayed and java_class_name_value
218                # will be displayed as a tooltip.
219            "durationValue": "A String", # Contains value if the data is of duration type.
220            "url": "A String", # An optional full URL.
221            "floatValue": 3.14, # Contains value if the data is of float type.
222            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
223                # language namespace (i.e. python module) which defines the display data.
224                # This allows a dax monitoring system to specially handle the data
225                # and perform custom rendering.
226            "javaClassValue": "A String", # Contains value if the data is of java class type.
227            "label": "A String", # An optional label to display in a dax UI for the element.
228            "boolValue": True or False, # Contains value if the data is of a boolean type.
229            "strValue": "A String", # Contains value if the data is of string type.
230            "key": "A String", # The key identifying the display data.
231                # This is intended to be used as a label for the display data
232                # when viewed in a dax monitoring system.
233            "int64Value": "A String", # Contains value if the data is of int64 type.
234            "timestampValue": "A String", # Contains value if the data is of timestamp type.
235          },
236        ],
237        "outputCollectionName": [ # User  names for all collection outputs to this transform.
238          "A String",
239        ],
240        "id": "A String", # SDK generated id of this transform instance.
241      },
242    ],
243    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
244      { # Description of the composing transforms, names/ids, and input/outputs of a
245          # stage of execution.  Some composing transforms and sources may have been
246          # generated by the Dataflow service during execution planning.
247        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
248          { # Description of an interstitial value between transforms in an execution
249              # stage.
250            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
251            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
252                # source is most closely associated.
253            "name": "A String", # Dataflow service generated name for this source.
254          },
255        ],
256        "kind": "A String", # Type of tranform this stage is executing.
257        "name": "A String", # Dataflow service generated name for this stage.
258        "outputSource": [ # Output sources for this stage.
259          { # Description of an input or output of an execution stage.
260            "userName": "A String", # Human-readable name for this source; may be user or system generated.
261            "sizeBytes": "A String", # Size of the source, if measurable.
262            "name": "A String", # Dataflow service generated name for this source.
263            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
264                # source is most closely associated.
265          },
266        ],
267        "inputSource": [ # Input sources for this stage.
268          { # Description of an input or output of an execution stage.
269            "userName": "A String", # Human-readable name for this source; may be user or system generated.
270            "sizeBytes": "A String", # Size of the source, if measurable.
271            "name": "A String", # Dataflow service generated name for this source.
272            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
273                # source is most closely associated.
274          },
275        ],
276        "componentTransform": [ # Transforms that comprise this execution stage.
277          { # Description of a transform executed as part of an execution stage.
278            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
279            "originalTransform": "A String", # User name for the original user transform with which this transform is
280                # most closely associated.
281            "name": "A String", # Dataflow service generated name for this source.
282          },
283        ],
284        "id": "A String", # Dataflow service generated id for this stage.
285      },
286    ],
287    "displayData": [ # Pipeline level display data.
288      { # Data provided with a pipeline or transform to provide descriptive info.
289        "shortStrValue": "A String", # A possible additional shorter value to display.
290            # For example a java_class_name_value of com.mypackage.MyDoFn
291            # will be stored with MyDoFn as the short_str_value and
292            # com.mypackage.MyDoFn as the java_class_name value.
293            # short_str_value can be displayed and java_class_name_value
294            # will be displayed as a tooltip.
295        "durationValue": "A String", # Contains value if the data is of duration type.
296        "url": "A String", # An optional full URL.
297        "floatValue": 3.14, # Contains value if the data is of float type.
298        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
299            # language namespace (i.e. python module) which defines the display data.
300            # This allows a dax monitoring system to specially handle the data
301            # and perform custom rendering.
302        "javaClassValue": "A String", # Contains value if the data is of java class type.
303        "label": "A String", # An optional label to display in a dax UI for the element.
304        "boolValue": True or False, # Contains value if the data is of a boolean type.
305        "strValue": "A String", # Contains value if the data is of string type.
306        "key": "A String", # The key identifying the display data.
307            # This is intended to be used as a label for the display data
308            # when viewed in a dax monitoring system.
309        "int64Value": "A String", # Contains value if the data is of int64 type.
310        "timestampValue": "A String", # Contains value if the data is of timestamp type.
311      },
312    ],
313  },
314  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
315      # callers cannot mutate it.
316    { # A message describing the state of a particular execution stage.
317      "executionStageName": "A String", # The name of the execution stage.
318      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
319      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
320    },
321  ],
322  "id": "A String", # The unique ID of this job.
323      #
324      # This field is set by the Cloud Dataflow service when the Job is
325      # created, and is immutable for the life of the job.
326  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
327      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
328  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
329  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
330      # corresponding name prefixes of the new job.
331    "a_key": "A String",
332  },
333  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
334    "version": { # A structure describing which components and their versions of the service
335        # are required in order to run the job.
336      "a_key": "", # Properties of the object.
337    },
338    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
339    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
340        # at rest, AKA a Customer Managed Encryption Key (CMEK).
341        #
342        # Format:
343        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
344    "internalExperiments": { # Experimental settings.
345      "a_key": "", # Properties of the object. Contains field @type with type URL.
346    },
347    "dataset": "A String", # The dataset for the current project where various workflow
348        # related tables are stored.
349        #
350        # The supported resource type is:
351        #
352        # Google BigQuery:
353        #   bigquery.googleapis.com/{dataset}
354    "experiments": [ # The list of experiments to enable.
355      "A String",
356    ],
357    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
358    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
359        # options are passed through the service and are used to recreate the
360        # SDK pipeline options on the worker in a language agnostic and platform
361        # independent way.
362      "a_key": "", # Properties of the object.
363    },
364    "userAgent": { # A description of the process that generated the request.
365      "a_key": "", # Properties of the object.
366    },
367    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
368        # unspecified, the service will attempt to choose a reasonable
369        # default.  This should be in the form of the API service name,
370        # e.g. "compute.googleapis.com".
371    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
372        # specified in order for the job to have workers.
373      { # Describes one particular pool of Cloud Dataflow workers to be
374          # instantiated by the Cloud Dataflow service in order to perform the
375          # computations required by a job.  Note that a workflow job may use
376          # multiple pools, in order to match the various computational
377          # requirements of the various stages of the job.
378        "diskSourceImage": "A String", # Fully qualified source image for disks.
379        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
380            # using the standard Dataflow task runner.  Users should ignore
381            # this field.
382          "workflowFileName": "A String", # The file to store the workflow in.
383          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
384              # will not be uploaded.
385              #
386              # The supported resource type is:
387              #
388              # Google Cloud Storage:
389              #   storage.googleapis.com/{bucket}/{object}
390              #   bucket.storage.googleapis.com/{object}
391          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
392          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
393            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
394            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
395                # "shuffle/v1beta1".
396            "workerId": "A String", # The ID of the worker running this pipeline.
397            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
398                #
399                # When workers access Google Cloud APIs, they logically do so via
400                # relative URLs.  If this field is specified, it supplies the base
401                # URL to use for resolving these relative URLs.  The normative
402                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
403                # Locators".
404                #
405                # If not specified, the default value is "http://www.googleapis.com/"
406            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
407                # "dataflow/v1b3/projects".
408            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
409                # storage.
410                #
411                # The supported resource type is:
412                #
413                # Google Cloud Storage:
414                #
415                #   storage.googleapis.com/{bucket}/{object}
416                #   bucket.storage.googleapis.com/{object}
417          },
418          "vmId": "A String", # The ID string of the VM.
419          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
420          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
421          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
422              # access the Cloud Dataflow API.
423            "A String",
424          ],
425          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
426              # taskrunner; e.g. "root".
427          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
428              #
429              # When workers access Google Cloud APIs, they logically do so via
430              # relative URLs.  If this field is specified, it supplies the base
431              # URL to use for resolving these relative URLs.  The normative
432              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
433              # Locators".
434              #
435              # If not specified, the default value is "http://www.googleapis.com/"
436          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
437              # taskrunner; e.g. "wheel".
438          "languageHint": "A String", # The suggested backend language.
439          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
440              # console.
441          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
442          "logDir": "A String", # The directory on the VM to store logs.
443          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
444          "harnessCommand": "A String", # The command to launch the worker harness.
445          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
446              # temporary storage.
447              #
448              # The supported resource type is:
449              #
450              # Google Cloud Storage:
451              #   storage.googleapis.com/{bucket}/{object}
452              #   bucket.storage.googleapis.com/{object}
453          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
454        },
455        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
456            # are supported.
457        "packages": [ # Packages to be installed on workers.
458          { # The packages that must be installed in order for a worker to run the
459              # steps of the Cloud Dataflow job that will be assigned to its worker
460              # pool.
461              #
462              # This is the mechanism by which the Cloud Dataflow SDK causes code to
463              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
464              # might use this to install jars containing the user's code and all of the
465              # various dependencies (libraries, data files, etc.) required in order
466              # for that code to run.
467            "location": "A String", # The resource to read the package from. The supported resource type is:
468                #
469                # Google Cloud Storage:
470                #
471                #   storage.googleapis.com/{bucket}
472                #   bucket.storage.googleapis.com/
473            "name": "A String", # The name of the package.
474          },
475        ],
476        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
477            # service will attempt to choose a reasonable default.
478        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
479            # the service will use the network "default".
480        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
481            # will attempt to choose a reasonable default.
482        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
483            # attempt to choose a reasonable default.
484        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
485            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
486            # `TEARDOWN_NEVER`.
487            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
488            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
489            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
490            # down.
491            #
492            # If the workers are not torn down by the service, they will
493            # continue to run and use Google Compute Engine VM resources in the
494            # user's project until they are explicitly terminated by the user.
495            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
496            # policy except for small, manually supervised test jobs.
497            #
498            # If unknown or unspecified, the service will attempt to choose a reasonable
499            # default.
500        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
501            # Compute Engine API.
502        "ipConfiguration": "A String", # Configuration for VM IPs.
503        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
504            # service will choose a number of threads (according to the number of cores
505            # on the selected machine type for batch, or 1 by convention for streaming).
506        "poolArgs": { # Extra arguments for this worker pool.
507          "a_key": "", # Properties of the object. Contains field @type with type URL.
508        },
509        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
510            # execute the job.  If zero or unspecified, the service will
511            # attempt to choose a reasonable default.
512        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
513            # harness, residing in Google Container Registry.
514        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
515            # the form "regions/REGION/subnetworks/SUBNETWORK".
516        "dataDisks": [ # Data disks that are used by a VM in this workflow.
517          { # Describes the data disk used by a workflow job.
518            "mountPoint": "A String", # Directory in a VM where disk is mounted.
519            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
520                # attempt to choose a reasonable default.
521            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
522                # must be a disk type appropriate to the project and zone in which
523                # the workers will run.  If unknown or unspecified, the service
524                # will attempt to choose a reasonable default.
525                #
526                # For example, the standard persistent disk type is a resource name
527                # typically ending in "pd-standard".  If SSD persistent disks are
528                # available, the resource name typically ends with "pd-ssd".  The
529                # actual valid values are defined the Google Compute Engine API,
530                # not by the Cloud Dataflow API; consult the Google Compute Engine
531                # documentation for more information about determining the set of
532                # available disk types for a particular project and zone.
533                #
534                # Google Compute Engine Disk types are local to a particular
535                # project in a particular zone, and so the resource name will
536                # typically look something like this:
537                #
538                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
539          },
540        ],
541        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
542          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
543          "algorithm": "A String", # The algorithm to use for autoscaling.
544        },
545        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
546            # select a default set of packages which are useful to worker
547            # harnesses written in a particular language.
548        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
549            # attempt to choose a reasonable default.
550        "metadata": { # Metadata to set on the Google Compute Engine VMs.
551          "a_key": "A String",
552        },
553      },
554    ],
555    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
556        # storage.  The system will append the suffix "/temp-{JOBNAME} to
557        # this resource prefix, where {JOBNAME} is the value of the
558        # job_name field.  The resulting bucket and object prefix is used
559        # as the prefix of the resources used to store temporary data
560        # needed during the job execution.  NOTE: This will override the
561        # value in taskrunner_settings.
562        # The supported resource type is:
563        #
564        # Google Cloud Storage:
565        #
566        #   storage.googleapis.com/{bucket}/{object}
567        #   bucket.storage.googleapis.com/{object}
568  },
569  "location": "A String", # The [regional endpoint]
570      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
571      # contains this job.
572  "tempFiles": [ # A set of files the system should be aware of that are used
573      # for temporary storage. These temporary files will be
574      # removed on job completion.
575      # No duplicates are allowed.
576      # No file patterns are supported.
577      #
578      # The supported files are:
579      #
580      # Google Cloud Storage:
581      #
582      #    storage.googleapis.com/{bucket}/{object}
583      #    bucket.storage.googleapis.com/{object}
584    "A String",
585  ],
586  "type": "A String", # The type of Cloud Dataflow job.
587  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
588      # If this field is set, the service will ensure its uniqueness.
589      # The request to create a job will fail if the service has knowledge of a
590      # previously submitted job with the same client's ID and job name.
591      # The caller may use this field to ensure idempotence of job
592      # creation across retried attempts to create a job.
593      # By default, the field is empty and, in that case, the service ignores it.
594  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
595      # snapshot.
596  "stepsLocation": "A String", # The GCS location where the steps are stored.
597  "currentStateTime": "A String", # The timestamp associated with the current state.
598  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
599      # Flexible resource scheduling jobs are started with some delay after job
600      # creation, so start_time is unset before start and is updated when the
601      # job is started by the Cloud Dataflow service. For other jobs, start_time
602      # always equals to create_time and is immutable and set by the Cloud Dataflow
603      # service.
604  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
605      # Cloud Dataflow service.
606  "requestedState": "A String", # The job's requested state.
607      #
608      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
609      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
610      # also be used to directly set a job's requested state to
611      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
612      # job if it has not already reached a terminal state.
613  "name": "A String", # The user-specified Cloud Dataflow job name.
614      #
615      # Only one Job with a given name may exist in a project at any
616      # given time. If a caller attempts to create a Job with the same
617      # name as an already-existing Job, the attempt returns the
618      # existing Job.
619      #
620      # The name must match the regular expression
621      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
622  "steps": [ # Exactly one of step or steps_location should be specified.
623      #
624      # The top-level steps that constitute the entire job.
625    { # Defines a particular step within a Cloud Dataflow job.
626        #
627        # A job consists of multiple steps, each of which performs some
628        # specific operation as part of the overall job.  Data is typically
629        # passed from one step to another as part of the job.
630        #
631        # Here's an example of a sequence of steps which together implement a
632        # Map-Reduce job:
633        #
634        #   * Read a collection of data from some source, parsing the
635        #     collection's elements.
636        #
637        #   * Validate the elements.
638        #
639        #   * Apply a user-defined function to map each element to some value
640        #     and extract an element-specific key value.
641        #
642        #   * Group elements with the same key into a single element with
643        #     that key, transforming a multiply-keyed collection into a
644        #     uniquely-keyed collection.
645        #
646        #   * Write the elements out to some data sink.
647        #
648        # Note that the Cloud Dataflow service may be used to run many different
649        # types of jobs, not just Map-Reduce.
650      "kind": "A String", # The kind of step in the Cloud Dataflow job.
651      "properties": { # Named properties associated with the step. Each kind of
652          # predefined step has its own required set of properties.
653          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
654        "a_key": "", # Properties of the object.
655      },
656      "name": "A String", # The name that identifies the step. This must be unique for each
657          # step with respect to all other steps in the Cloud Dataflow job.
658    },
659  ],
660  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
661      # of the job it replaced.
662      #
663      # When sending a `CreateJobRequest`, you can update a job by specifying it
664      # here. The job named here is stopped, and its intermediate state is
665      # transferred to this job.
666  "currentState": "A String", # The current state of the job.
667      #
668      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
669      # specified.
670      #
671      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
672      # terminal state. After a job has reached a terminal state, no
673      # further state updates may be made.
674      #
675      # This field may be mutated by the Cloud Dataflow service;
676      # callers cannot mutate it.
677  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
678      # isn't contained in the submitted job.
679    "stages": { # A mapping from each stage to the information about that stage.
680      "a_key": { # Contains information about how a particular
681          # google.dataflow.v1beta3.Step will be executed.
682        "stepName": [ # The steps associated with the execution stage.
683            # Note that stages may have several steps, and that a given step
684            # might be run by more than one stage.
685          "A String",
686        ],
687      },
688    },
689  },
690}
691
692  x__xgafv: string, V1 error format.
693    Allowed values
694      1 - v1 error format
695      2 - v2 error format
696  replaceJobId: string, Deprecated. This field is now in the Job message.
697  view: string, The level of information requested in response.
698
699Returns:
700  An object of the form:
701
702    { # Defines a job to be run by the Cloud Dataflow service.
703    "labels": { # User-defined labels for this job.
704        #
705        # The labels map can contain no more than 64 entries.  Entries of the labels
706        # map are UTF8 strings that comply with the following restrictions:
707        #
708        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
709        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
710        # * Both keys and values are additionally constrained to be <= 128 bytes in
711        # size.
712      "a_key": "A String",
713    },
714    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
715        # by the metadata values provided here. Populated for ListJobs and all GetJob
716        # views SUMMARY and higher.
717        # ListJob response and Job SUMMARY view.
718      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
719        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
720        "version": "A String", # The version of the SDK used to run the job.
721        "sdkSupportStatus": "A String", # The support status for this SDK version.
722      },
723      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
724        { # Metadata for a PubSub connector used by the job.
725          "topic": "A String", # Topic accessed in the connection.
726          "subscription": "A String", # Subscription used in the connection.
727        },
728      ],
729      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
730        { # Metadata for a Datastore connector used by the job.
731          "projectId": "A String", # ProjectId accessed in the connection.
732          "namespace": "A String", # Namespace used in the connection.
733        },
734      ],
735      "fileDetails": [ # Identification of a File source used in the Dataflow job.
736        { # Metadata for a File connector used by the job.
737          "filePattern": "A String", # File Pattern used to access files by the connector.
738        },
739      ],
740      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
741        { # Metadata for a Spanner connector used by the job.
742          "instanceId": "A String", # InstanceId accessed in the connection.
743          "projectId": "A String", # ProjectId accessed in the connection.
744          "databaseId": "A String", # DatabaseId accessed in the connection.
745        },
746      ],
747      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
748        { # Metadata for a BigTable connector used by the job.
749          "instanceId": "A String", # InstanceId accessed in the connection.
750          "projectId": "A String", # ProjectId accessed in the connection.
751          "tableId": "A String", # TableId accessed in the connection.
752        },
753      ],
754      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
755        { # Metadata for a BigQuery connector used by the job.
756          "projectId": "A String", # Project accessed in the connection.
757          "dataset": "A String", # Dataset accessed in the connection.
758          "table": "A String", # Table accessed in the connection.
759          "query": "A String", # Query used to access data in the connection.
760        },
761      ],
762    },
763    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
764        # A description of the user pipeline and stages through which it is executed.
765        # Created by Cloud Dataflow service.  Only retrieved with
766        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
767        # form.  This data is provided by the Dataflow service for ease of visualizing
768        # the pipeline and interpreting Dataflow provided metrics.
769      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
770        { # Description of the type, names/ids, and input/outputs for a transform.
771          "kind": "A String", # Type of transform.
772          "name": "A String", # User provided name for this transform instance.
773          "inputCollectionName": [ # User names for all collection inputs to this transform.
774            "A String",
775          ],
776          "displayData": [ # Transform-specific display data.
777            { # Data provided with a pipeline or transform to provide descriptive info.
778              "shortStrValue": "A String", # A possible additional shorter value to display.
779                  # For example a java_class_name_value of com.mypackage.MyDoFn
780                  # will be stored with MyDoFn as the short_str_value and
781                  # com.mypackage.MyDoFn as the java_class_name value.
782                  # short_str_value can be displayed and java_class_name_value
783                  # will be displayed as a tooltip.
784              "durationValue": "A String", # Contains value if the data is of duration type.
785              "url": "A String", # An optional full URL.
786              "floatValue": 3.14, # Contains value if the data is of float type.
787              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
788                  # language namespace (i.e. python module) which defines the display data.
789                  # This allows a dax monitoring system to specially handle the data
790                  # and perform custom rendering.
791              "javaClassValue": "A String", # Contains value if the data is of java class type.
792              "label": "A String", # An optional label to display in a dax UI for the element.
793              "boolValue": True or False, # Contains value if the data is of a boolean type.
794              "strValue": "A String", # Contains value if the data is of string type.
795              "key": "A String", # The key identifying the display data.
796                  # This is intended to be used as a label for the display data
797                  # when viewed in a dax monitoring system.
798              "int64Value": "A String", # Contains value if the data is of int64 type.
799              "timestampValue": "A String", # Contains value if the data is of timestamp type.
800            },
801          ],
802          "outputCollectionName": [ # User  names for all collection outputs to this transform.
803            "A String",
804          ],
805          "id": "A String", # SDK generated id of this transform instance.
806        },
807      ],
808      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
809        { # Description of the composing transforms, names/ids, and input/outputs of a
810            # stage of execution.  Some composing transforms and sources may have been
811            # generated by the Dataflow service during execution planning.
812          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
813            { # Description of an interstitial value between transforms in an execution
814                # stage.
815              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
816              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
817                  # source is most closely associated.
818              "name": "A String", # Dataflow service generated name for this source.
819            },
820          ],
821          "kind": "A String", # Type of tranform this stage is executing.
822          "name": "A String", # Dataflow service generated name for this stage.
823          "outputSource": [ # Output sources for this stage.
824            { # Description of an input or output of an execution stage.
825              "userName": "A String", # Human-readable name for this source; may be user or system generated.
826              "sizeBytes": "A String", # Size of the source, if measurable.
827              "name": "A String", # Dataflow service generated name for this source.
828              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
829                  # source is most closely associated.
830            },
831          ],
832          "inputSource": [ # Input sources for this stage.
833            { # Description of an input or output of an execution stage.
834              "userName": "A String", # Human-readable name for this source; may be user or system generated.
835              "sizeBytes": "A String", # Size of the source, if measurable.
836              "name": "A String", # Dataflow service generated name for this source.
837              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
838                  # source is most closely associated.
839            },
840          ],
841          "componentTransform": [ # Transforms that comprise this execution stage.
842            { # Description of a transform executed as part of an execution stage.
843              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
844              "originalTransform": "A String", # User name for the original user transform with which this transform is
845                  # most closely associated.
846              "name": "A String", # Dataflow service generated name for this source.
847            },
848          ],
849          "id": "A String", # Dataflow service generated id for this stage.
850        },
851      ],
852      "displayData": [ # Pipeline level display data.
853        { # Data provided with a pipeline or transform to provide descriptive info.
854          "shortStrValue": "A String", # A possible additional shorter value to display.
855              # For example a java_class_name_value of com.mypackage.MyDoFn
856              # will be stored with MyDoFn as the short_str_value and
857              # com.mypackage.MyDoFn as the java_class_name value.
858              # short_str_value can be displayed and java_class_name_value
859              # will be displayed as a tooltip.
860          "durationValue": "A String", # Contains value if the data is of duration type.
861          "url": "A String", # An optional full URL.
862          "floatValue": 3.14, # Contains value if the data is of float type.
863          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
864              # language namespace (i.e. python module) which defines the display data.
865              # This allows a dax monitoring system to specially handle the data
866              # and perform custom rendering.
867          "javaClassValue": "A String", # Contains value if the data is of java class type.
868          "label": "A String", # An optional label to display in a dax UI for the element.
869          "boolValue": True or False, # Contains value if the data is of a boolean type.
870          "strValue": "A String", # Contains value if the data is of string type.
871          "key": "A String", # The key identifying the display data.
872              # This is intended to be used as a label for the display data
873              # when viewed in a dax monitoring system.
874          "int64Value": "A String", # Contains value if the data is of int64 type.
875          "timestampValue": "A String", # Contains value if the data is of timestamp type.
876        },
877      ],
878    },
879    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
880        # callers cannot mutate it.
881      { # A message describing the state of a particular execution stage.
882        "executionStageName": "A String", # The name of the execution stage.
883        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
884        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
885      },
886    ],
887    "id": "A String", # The unique ID of this job.
888        #
889        # This field is set by the Cloud Dataflow service when the Job is
890        # created, and is immutable for the life of the job.
891    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
892        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
893    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
894    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
895        # corresponding name prefixes of the new job.
896      "a_key": "A String",
897    },
898    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
899      "version": { # A structure describing which components and their versions of the service
900          # are required in order to run the job.
901        "a_key": "", # Properties of the object.
902      },
903      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
904      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
905          # at rest, AKA a Customer Managed Encryption Key (CMEK).
906          #
907          # Format:
908          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
909      "internalExperiments": { # Experimental settings.
910        "a_key": "", # Properties of the object. Contains field @type with type URL.
911      },
912      "dataset": "A String", # The dataset for the current project where various workflow
913          # related tables are stored.
914          #
915          # The supported resource type is:
916          #
917          # Google BigQuery:
918          #   bigquery.googleapis.com/{dataset}
919      "experiments": [ # The list of experiments to enable.
920        "A String",
921      ],
922      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
923      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
924          # options are passed through the service and are used to recreate the
925          # SDK pipeline options on the worker in a language agnostic and platform
926          # independent way.
927        "a_key": "", # Properties of the object.
928      },
929      "userAgent": { # A description of the process that generated the request.
930        "a_key": "", # Properties of the object.
931      },
932      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
933          # unspecified, the service will attempt to choose a reasonable
934          # default.  This should be in the form of the API service name,
935          # e.g. "compute.googleapis.com".
936      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
937          # specified in order for the job to have workers.
938        { # Describes one particular pool of Cloud Dataflow workers to be
939            # instantiated by the Cloud Dataflow service in order to perform the
940            # computations required by a job.  Note that a workflow job may use
941            # multiple pools, in order to match the various computational
942            # requirements of the various stages of the job.
943          "diskSourceImage": "A String", # Fully qualified source image for disks.
944          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
945              # using the standard Dataflow task runner.  Users should ignore
946              # this field.
947            "workflowFileName": "A String", # The file to store the workflow in.
948            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
949                # will not be uploaded.
950                #
951                # The supported resource type is:
952                #
953                # Google Cloud Storage:
954                #   storage.googleapis.com/{bucket}/{object}
955                #   bucket.storage.googleapis.com/{object}
956            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
957            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
958              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
959              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
960                  # "shuffle/v1beta1".
961              "workerId": "A String", # The ID of the worker running this pipeline.
962              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
963                  #
964                  # When workers access Google Cloud APIs, they logically do so via
965                  # relative URLs.  If this field is specified, it supplies the base
966                  # URL to use for resolving these relative URLs.  The normative
967                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
968                  # Locators".
969                  #
970                  # If not specified, the default value is "http://www.googleapis.com/"
971              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
972                  # "dataflow/v1b3/projects".
973              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
974                  # storage.
975                  #
976                  # The supported resource type is:
977                  #
978                  # Google Cloud Storage:
979                  #
980                  #   storage.googleapis.com/{bucket}/{object}
981                  #   bucket.storage.googleapis.com/{object}
982            },
983            "vmId": "A String", # The ID string of the VM.
984            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
985            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
986            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
987                # access the Cloud Dataflow API.
988              "A String",
989            ],
990            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
991                # taskrunner; e.g. "root".
992            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
993                #
994                # When workers access Google Cloud APIs, they logically do so via
995                # relative URLs.  If this field is specified, it supplies the base
996                # URL to use for resolving these relative URLs.  The normative
997                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
998                # Locators".
999                #
1000                # If not specified, the default value is "http://www.googleapis.com/"
1001            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1002                # taskrunner; e.g. "wheel".
1003            "languageHint": "A String", # The suggested backend language.
1004            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1005                # console.
1006            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1007            "logDir": "A String", # The directory on the VM to store logs.
1008            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1009            "harnessCommand": "A String", # The command to launch the worker harness.
1010            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1011                # temporary storage.
1012                #
1013                # The supported resource type is:
1014                #
1015                # Google Cloud Storage:
1016                #   storage.googleapis.com/{bucket}/{object}
1017                #   bucket.storage.googleapis.com/{object}
1018            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1019          },
1020          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1021              # are supported.
1022          "packages": [ # Packages to be installed on workers.
1023            { # The packages that must be installed in order for a worker to run the
1024                # steps of the Cloud Dataflow job that will be assigned to its worker
1025                # pool.
1026                #
1027                # This is the mechanism by which the Cloud Dataflow SDK causes code to
1028                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1029                # might use this to install jars containing the user's code and all of the
1030                # various dependencies (libraries, data files, etc.) required in order
1031                # for that code to run.
1032              "location": "A String", # The resource to read the package from. The supported resource type is:
1033                  #
1034                  # Google Cloud Storage:
1035                  #
1036                  #   storage.googleapis.com/{bucket}
1037                  #   bucket.storage.googleapis.com/
1038              "name": "A String", # The name of the package.
1039            },
1040          ],
1041          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1042              # service will attempt to choose a reasonable default.
1043          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1044              # the service will use the network "default".
1045          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1046              # will attempt to choose a reasonable default.
1047          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1048              # attempt to choose a reasonable default.
1049          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1050              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1051              # `TEARDOWN_NEVER`.
1052              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1053              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1054              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1055              # down.
1056              #
1057              # If the workers are not torn down by the service, they will
1058              # continue to run and use Google Compute Engine VM resources in the
1059              # user's project until they are explicitly terminated by the user.
1060              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1061              # policy except for small, manually supervised test jobs.
1062              #
1063              # If unknown or unspecified, the service will attempt to choose a reasonable
1064              # default.
1065          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1066              # Compute Engine API.
1067          "ipConfiguration": "A String", # Configuration for VM IPs.
1068          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1069              # service will choose a number of threads (according to the number of cores
1070              # on the selected machine type for batch, or 1 by convention for streaming).
1071          "poolArgs": { # Extra arguments for this worker pool.
1072            "a_key": "", # Properties of the object. Contains field @type with type URL.
1073          },
1074          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1075              # execute the job.  If zero or unspecified, the service will
1076              # attempt to choose a reasonable default.
1077          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1078              # harness, residing in Google Container Registry.
1079          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1080              # the form "regions/REGION/subnetworks/SUBNETWORK".
1081          "dataDisks": [ # Data disks that are used by a VM in this workflow.
1082            { # Describes the data disk used by a workflow job.
1083              "mountPoint": "A String", # Directory in a VM where disk is mounted.
1084              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1085                  # attempt to choose a reasonable default.
1086              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1087                  # must be a disk type appropriate to the project and zone in which
1088                  # the workers will run.  If unknown or unspecified, the service
1089                  # will attempt to choose a reasonable default.
1090                  #
1091                  # For example, the standard persistent disk type is a resource name
1092                  # typically ending in "pd-standard".  If SSD persistent disks are
1093                  # available, the resource name typically ends with "pd-ssd".  The
1094                  # actual valid values are defined the Google Compute Engine API,
1095                  # not by the Cloud Dataflow API; consult the Google Compute Engine
1096                  # documentation for more information about determining the set of
1097                  # available disk types for a particular project and zone.
1098                  #
1099                  # Google Compute Engine Disk types are local to a particular
1100                  # project in a particular zone, and so the resource name will
1101                  # typically look something like this:
1102                  #
1103                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1104            },
1105          ],
1106          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1107            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1108            "algorithm": "A String", # The algorithm to use for autoscaling.
1109          },
1110          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1111              # select a default set of packages which are useful to worker
1112              # harnesses written in a particular language.
1113          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1114              # attempt to choose a reasonable default.
1115          "metadata": { # Metadata to set on the Google Compute Engine VMs.
1116            "a_key": "A String",
1117          },
1118        },
1119      ],
1120      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1121          # storage.  The system will append the suffix "/temp-{JOBNAME} to
1122          # this resource prefix, where {JOBNAME} is the value of the
1123          # job_name field.  The resulting bucket and object prefix is used
1124          # as the prefix of the resources used to store temporary data
1125          # needed during the job execution.  NOTE: This will override the
1126          # value in taskrunner_settings.
1127          # The supported resource type is:
1128          #
1129          # Google Cloud Storage:
1130          #
1131          #   storage.googleapis.com/{bucket}/{object}
1132          #   bucket.storage.googleapis.com/{object}
1133    },
1134    "location": "A String", # The [regional endpoint]
1135        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1136        # contains this job.
1137    "tempFiles": [ # A set of files the system should be aware of that are used
1138        # for temporary storage. These temporary files will be
1139        # removed on job completion.
1140        # No duplicates are allowed.
1141        # No file patterns are supported.
1142        #
1143        # The supported files are:
1144        #
1145        # Google Cloud Storage:
1146        #
1147        #    storage.googleapis.com/{bucket}/{object}
1148        #    bucket.storage.googleapis.com/{object}
1149      "A String",
1150    ],
1151    "type": "A String", # The type of Cloud Dataflow job.
1152    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1153        # If this field is set, the service will ensure its uniqueness.
1154        # The request to create a job will fail if the service has knowledge of a
1155        # previously submitted job with the same client's ID and job name.
1156        # The caller may use this field to ensure idempotence of job
1157        # creation across retried attempts to create a job.
1158        # By default, the field is empty and, in that case, the service ignores it.
1159    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1160        # snapshot.
1161    "stepsLocation": "A String", # The GCS location where the steps are stored.
1162    "currentStateTime": "A String", # The timestamp associated with the current state.
1163    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1164        # Flexible resource scheduling jobs are started with some delay after job
1165        # creation, so start_time is unset before start and is updated when the
1166        # job is started by the Cloud Dataflow service. For other jobs, start_time
1167        # always equals to create_time and is immutable and set by the Cloud Dataflow
1168        # service.
1169    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1170        # Cloud Dataflow service.
1171    "requestedState": "A String", # The job's requested state.
1172        #
1173        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1174        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1175        # also be used to directly set a job's requested state to
1176        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1177        # job if it has not already reached a terminal state.
1178    "name": "A String", # The user-specified Cloud Dataflow job name.
1179        #
1180        # Only one Job with a given name may exist in a project at any
1181        # given time. If a caller attempts to create a Job with the same
1182        # name as an already-existing Job, the attempt returns the
1183        # existing Job.
1184        #
1185        # The name must match the regular expression
1186        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1187    "steps": [ # Exactly one of step or steps_location should be specified.
1188        #
1189        # The top-level steps that constitute the entire job.
1190      { # Defines a particular step within a Cloud Dataflow job.
1191          #
1192          # A job consists of multiple steps, each of which performs some
1193          # specific operation as part of the overall job.  Data is typically
1194          # passed from one step to another as part of the job.
1195          #
1196          # Here's an example of a sequence of steps which together implement a
1197          # Map-Reduce job:
1198          #
1199          #   * Read a collection of data from some source, parsing the
1200          #     collection's elements.
1201          #
1202          #   * Validate the elements.
1203          #
1204          #   * Apply a user-defined function to map each element to some value
1205          #     and extract an element-specific key value.
1206          #
1207          #   * Group elements with the same key into a single element with
1208          #     that key, transforming a multiply-keyed collection into a
1209          #     uniquely-keyed collection.
1210          #
1211          #   * Write the elements out to some data sink.
1212          #
1213          # Note that the Cloud Dataflow service may be used to run many different
1214          # types of jobs, not just Map-Reduce.
1215        "kind": "A String", # The kind of step in the Cloud Dataflow job.
1216        "properties": { # Named properties associated with the step. Each kind of
1217            # predefined step has its own required set of properties.
1218            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1219          "a_key": "", # Properties of the object.
1220        },
1221        "name": "A String", # The name that identifies the step. This must be unique for each
1222            # step with respect to all other steps in the Cloud Dataflow job.
1223      },
1224    ],
1225    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1226        # of the job it replaced.
1227        #
1228        # When sending a `CreateJobRequest`, you can update a job by specifying it
1229        # here. The job named here is stopped, and its intermediate state is
1230        # transferred to this job.
1231    "currentState": "A String", # The current state of the job.
1232        #
1233        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1234        # specified.
1235        #
1236        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1237        # terminal state. After a job has reached a terminal state, no
1238        # further state updates may be made.
1239        #
1240        # This field may be mutated by the Cloud Dataflow service;
1241        # callers cannot mutate it.
1242    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1243        # isn't contained in the submitted job.
1244      "stages": { # A mapping from each stage to the information about that stage.
1245        "a_key": { # Contains information about how a particular
1246            # google.dataflow.v1beta3.Step will be executed.
1247          "stepName": [ # The steps associated with the execution stage.
1248              # Note that stages may have several steps, and that a given step
1249              # might be run by more than one stage.
1250            "A String",
1251          ],
1252        },
1253      },
1254    },
1255  }</pre>
1256</div>
1257
1258<div class="method">
1259    <code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>
1260  <pre>Gets the state of the specified Cloud Dataflow job.
1261
1262To get the state of a job, we recommend using `projects.locations.jobs.get`
1263with a [regional endpoint]
1264(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
1265`projects.jobs.get` is not recommended, as you can only get the state of
1266jobs that are running in `us-central1`.
1267
1268Args:
1269  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1270  location: string, The [regional endpoint]
1271(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1272contains this job. (required)
1273  jobId: string, The job ID. (required)
1274  x__xgafv: string, V1 error format.
1275    Allowed values
1276      1 - v1 error format
1277      2 - v2 error format
1278  view: string, The level of information requested in response.
1279
1280Returns:
1281  An object of the form:
1282
1283    { # Defines a job to be run by the Cloud Dataflow service.
1284    "labels": { # User-defined labels for this job.
1285        #
1286        # The labels map can contain no more than 64 entries.  Entries of the labels
1287        # map are UTF8 strings that comply with the following restrictions:
1288        #
1289        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1290        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1291        # * Both keys and values are additionally constrained to be <= 128 bytes in
1292        # size.
1293      "a_key": "A String",
1294    },
1295    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
1296        # by the metadata values provided here. Populated for ListJobs and all GetJob
1297        # views SUMMARY and higher.
1298        # ListJob response and Job SUMMARY view.
1299      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
1300        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
1301        "version": "A String", # The version of the SDK used to run the job.
1302        "sdkSupportStatus": "A String", # The support status for this SDK version.
1303      },
1304      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
1305        { # Metadata for a PubSub connector used by the job.
1306          "topic": "A String", # Topic accessed in the connection.
1307          "subscription": "A String", # Subscription used in the connection.
1308        },
1309      ],
1310      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
1311        { # Metadata for a Datastore connector used by the job.
1312          "projectId": "A String", # ProjectId accessed in the connection.
1313          "namespace": "A String", # Namespace used in the connection.
1314        },
1315      ],
1316      "fileDetails": [ # Identification of a File source used in the Dataflow job.
1317        { # Metadata for a File connector used by the job.
1318          "filePattern": "A String", # File Pattern used to access files by the connector.
1319        },
1320      ],
1321      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
1322        { # Metadata for a Spanner connector used by the job.
1323          "instanceId": "A String", # InstanceId accessed in the connection.
1324          "projectId": "A String", # ProjectId accessed in the connection.
1325          "databaseId": "A String", # DatabaseId accessed in the connection.
1326        },
1327      ],
1328      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
1329        { # Metadata for a BigTable connector used by the job.
1330          "instanceId": "A String", # InstanceId accessed in the connection.
1331          "projectId": "A String", # ProjectId accessed in the connection.
1332          "tableId": "A String", # TableId accessed in the connection.
1333        },
1334      ],
1335      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
1336        { # Metadata for a BigQuery connector used by the job.
1337          "projectId": "A String", # Project accessed in the connection.
1338          "dataset": "A String", # Dataset accessed in the connection.
1339          "table": "A String", # Table accessed in the connection.
1340          "query": "A String", # Query used to access data in the connection.
1341        },
1342      ],
1343    },
1344    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1345        # A description of the user pipeline and stages through which it is executed.
1346        # Created by Cloud Dataflow service.  Only retrieved with
1347        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1348        # form.  This data is provided by the Dataflow service for ease of visualizing
1349        # the pipeline and interpreting Dataflow provided metrics.
1350      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1351        { # Description of the type, names/ids, and input/outputs for a transform.
1352          "kind": "A String", # Type of transform.
1353          "name": "A String", # User provided name for this transform instance.
1354          "inputCollectionName": [ # User names for all collection inputs to this transform.
1355            "A String",
1356          ],
1357          "displayData": [ # Transform-specific display data.
1358            { # Data provided with a pipeline or transform to provide descriptive info.
1359              "shortStrValue": "A String", # A possible additional shorter value to display.
1360                  # For example a java_class_name_value of com.mypackage.MyDoFn
1361                  # will be stored with MyDoFn as the short_str_value and
1362                  # com.mypackage.MyDoFn as the java_class_name value.
1363                  # short_str_value can be displayed and java_class_name_value
1364                  # will be displayed as a tooltip.
1365              "durationValue": "A String", # Contains value if the data is of duration type.
1366              "url": "A String", # An optional full URL.
1367              "floatValue": 3.14, # Contains value if the data is of float type.
1368              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1369                  # language namespace (i.e. python module) which defines the display data.
1370                  # This allows a dax monitoring system to specially handle the data
1371                  # and perform custom rendering.
1372              "javaClassValue": "A String", # Contains value if the data is of java class type.
1373              "label": "A String", # An optional label to display in a dax UI for the element.
1374              "boolValue": True or False, # Contains value if the data is of a boolean type.
1375              "strValue": "A String", # Contains value if the data is of string type.
1376              "key": "A String", # The key identifying the display data.
1377                  # This is intended to be used as a label for the display data
1378                  # when viewed in a dax monitoring system.
1379              "int64Value": "A String", # Contains value if the data is of int64 type.
1380              "timestampValue": "A String", # Contains value if the data is of timestamp type.
1381            },
1382          ],
1383          "outputCollectionName": [ # User  names for all collection outputs to this transform.
1384            "A String",
1385          ],
1386          "id": "A String", # SDK generated id of this transform instance.
1387        },
1388      ],
1389      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1390        { # Description of the composing transforms, names/ids, and input/outputs of a
1391            # stage of execution.  Some composing transforms and sources may have been
1392            # generated by the Dataflow service during execution planning.
1393          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1394            { # Description of an interstitial value between transforms in an execution
1395                # stage.
1396              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1397              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1398                  # source is most closely associated.
1399              "name": "A String", # Dataflow service generated name for this source.
1400            },
1401          ],
1402          "kind": "A String", # Type of tranform this stage is executing.
1403          "name": "A String", # Dataflow service generated name for this stage.
1404          "outputSource": [ # Output sources for this stage.
1405            { # Description of an input or output of an execution stage.
1406              "userName": "A String", # Human-readable name for this source; may be user or system generated.
1407              "sizeBytes": "A String", # Size of the source, if measurable.
1408              "name": "A String", # Dataflow service generated name for this source.
1409              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1410                  # source is most closely associated.
1411            },
1412          ],
1413          "inputSource": [ # Input sources for this stage.
1414            { # Description of an input or output of an execution stage.
1415              "userName": "A String", # Human-readable name for this source; may be user or system generated.
1416              "sizeBytes": "A String", # Size of the source, if measurable.
1417              "name": "A String", # Dataflow service generated name for this source.
1418              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1419                  # source is most closely associated.
1420            },
1421          ],
1422          "componentTransform": [ # Transforms that comprise this execution stage.
1423            { # Description of a transform executed as part of an execution stage.
1424              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1425              "originalTransform": "A String", # User name for the original user transform with which this transform is
1426                  # most closely associated.
1427              "name": "A String", # Dataflow service generated name for this source.
1428            },
1429          ],
1430          "id": "A String", # Dataflow service generated id for this stage.
1431        },
1432      ],
1433      "displayData": [ # Pipeline level display data.
1434        { # Data provided with a pipeline or transform to provide descriptive info.
1435          "shortStrValue": "A String", # A possible additional shorter value to display.
1436              # For example a java_class_name_value of com.mypackage.MyDoFn
1437              # will be stored with MyDoFn as the short_str_value and
1438              # com.mypackage.MyDoFn as the java_class_name value.
1439              # short_str_value can be displayed and java_class_name_value
1440              # will be displayed as a tooltip.
1441          "durationValue": "A String", # Contains value if the data is of duration type.
1442          "url": "A String", # An optional full URL.
1443          "floatValue": 3.14, # Contains value if the data is of float type.
1444          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1445              # language namespace (i.e. python module) which defines the display data.
1446              # This allows a dax monitoring system to specially handle the data
1447              # and perform custom rendering.
1448          "javaClassValue": "A String", # Contains value if the data is of java class type.
1449          "label": "A String", # An optional label to display in a dax UI for the element.
1450          "boolValue": True or False, # Contains value if the data is of a boolean type.
1451          "strValue": "A String", # Contains value if the data is of string type.
1452          "key": "A String", # The key identifying the display data.
1453              # This is intended to be used as a label for the display data
1454              # when viewed in a dax monitoring system.
1455          "int64Value": "A String", # Contains value if the data is of int64 type.
1456          "timestampValue": "A String", # Contains value if the data is of timestamp type.
1457        },
1458      ],
1459    },
1460    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1461        # callers cannot mutate it.
1462      { # A message describing the state of a particular execution stage.
1463        "executionStageName": "A String", # The name of the execution stage.
1464        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1465        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1466      },
1467    ],
1468    "id": "A String", # The unique ID of this job.
1469        #
1470        # This field is set by the Cloud Dataflow service when the Job is
1471        # created, and is immutable for the life of the job.
1472    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1473        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1474    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1475    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1476        # corresponding name prefixes of the new job.
1477      "a_key": "A String",
1478    },
1479    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1480      "version": { # A structure describing which components and their versions of the service
1481          # are required in order to run the job.
1482        "a_key": "", # Properties of the object.
1483      },
1484      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
1485      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
1486          # at rest, AKA a Customer Managed Encryption Key (CMEK).
1487          #
1488          # Format:
1489          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
1490      "internalExperiments": { # Experimental settings.
1491        "a_key": "", # Properties of the object. Contains field @type with type URL.
1492      },
1493      "dataset": "A String", # The dataset for the current project where various workflow
1494          # related tables are stored.
1495          #
1496          # The supported resource type is:
1497          #
1498          # Google BigQuery:
1499          #   bigquery.googleapis.com/{dataset}
1500      "experiments": [ # The list of experiments to enable.
1501        "A String",
1502      ],
1503      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1504      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1505          # options are passed through the service and are used to recreate the
1506          # SDK pipeline options on the worker in a language agnostic and platform
1507          # independent way.
1508        "a_key": "", # Properties of the object.
1509      },
1510      "userAgent": { # A description of the process that generated the request.
1511        "a_key": "", # Properties of the object.
1512      },
1513      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1514          # unspecified, the service will attempt to choose a reasonable
1515          # default.  This should be in the form of the API service name,
1516          # e.g. "compute.googleapis.com".
1517      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1518          # specified in order for the job to have workers.
1519        { # Describes one particular pool of Cloud Dataflow workers to be
1520            # instantiated by the Cloud Dataflow service in order to perform the
1521            # computations required by a job.  Note that a workflow job may use
1522            # multiple pools, in order to match the various computational
1523            # requirements of the various stages of the job.
1524          "diskSourceImage": "A String", # Fully qualified source image for disks.
1525          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1526              # using the standard Dataflow task runner.  Users should ignore
1527              # this field.
1528            "workflowFileName": "A String", # The file to store the workflow in.
1529            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1530                # will not be uploaded.
1531                #
1532                # The supported resource type is:
1533                #
1534                # Google Cloud Storage:
1535                #   storage.googleapis.com/{bucket}/{object}
1536                #   bucket.storage.googleapis.com/{object}
1537            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1538            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1539              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1540              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1541                  # "shuffle/v1beta1".
1542              "workerId": "A String", # The ID of the worker running this pipeline.
1543              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1544                  #
1545                  # When workers access Google Cloud APIs, they logically do so via
1546                  # relative URLs.  If this field is specified, it supplies the base
1547                  # URL to use for resolving these relative URLs.  The normative
1548                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1549                  # Locators".
1550                  #
1551                  # If not specified, the default value is "http://www.googleapis.com/"
1552              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1553                  # "dataflow/v1b3/projects".
1554              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1555                  # storage.
1556                  #
1557                  # The supported resource type is:
1558                  #
1559                  # Google Cloud Storage:
1560                  #
1561                  #   storage.googleapis.com/{bucket}/{object}
1562                  #   bucket.storage.googleapis.com/{object}
1563            },
1564            "vmId": "A String", # The ID string of the VM.
1565            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1566            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1567            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1568                # access the Cloud Dataflow API.
1569              "A String",
1570            ],
1571            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1572                # taskrunner; e.g. "root".
1573            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1574                #
1575                # When workers access Google Cloud APIs, they logically do so via
1576                # relative URLs.  If this field is specified, it supplies the base
1577                # URL to use for resolving these relative URLs.  The normative
1578                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1579                # Locators".
1580                #
1581                # If not specified, the default value is "http://www.googleapis.com/"
1582            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1583                # taskrunner; e.g. "wheel".
1584            "languageHint": "A String", # The suggested backend language.
1585            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1586                # console.
1587            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1588            "logDir": "A String", # The directory on the VM to store logs.
1589            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1590            "harnessCommand": "A String", # The command to launch the worker harness.
1591            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1592                # temporary storage.
1593                #
1594                # The supported resource type is:
1595                #
1596                # Google Cloud Storage:
1597                #   storage.googleapis.com/{bucket}/{object}
1598                #   bucket.storage.googleapis.com/{object}
1599            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1600          },
1601          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1602              # are supported.
1603          "packages": [ # Packages to be installed on workers.
1604            { # The packages that must be installed in order for a worker to run the
1605                # steps of the Cloud Dataflow job that will be assigned to its worker
1606                # pool.
1607                #
1608                # This is the mechanism by which the Cloud Dataflow SDK causes code to
1609                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1610                # might use this to install jars containing the user's code and all of the
1611                # various dependencies (libraries, data files, etc.) required in order
1612                # for that code to run.
1613              "location": "A String", # The resource to read the package from. The supported resource type is:
1614                  #
1615                  # Google Cloud Storage:
1616                  #
1617                  #   storage.googleapis.com/{bucket}
1618                  #   bucket.storage.googleapis.com/
1619              "name": "A String", # The name of the package.
1620            },
1621          ],
1622          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1623              # service will attempt to choose a reasonable default.
1624          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1625              # the service will use the network "default".
1626          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1627              # will attempt to choose a reasonable default.
1628          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1629              # attempt to choose a reasonable default.
1630          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1631              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1632              # `TEARDOWN_NEVER`.
1633              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1634              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1635              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1636              # down.
1637              #
1638              # If the workers are not torn down by the service, they will
1639              # continue to run and use Google Compute Engine VM resources in the
1640              # user's project until they are explicitly terminated by the user.
1641              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1642              # policy except for small, manually supervised test jobs.
1643              #
1644              # If unknown or unspecified, the service will attempt to choose a reasonable
1645              # default.
1646          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1647              # Compute Engine API.
1648          "ipConfiguration": "A String", # Configuration for VM IPs.
1649          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1650              # service will choose a number of threads (according to the number of cores
1651              # on the selected machine type for batch, or 1 by convention for streaming).
1652          "poolArgs": { # Extra arguments for this worker pool.
1653            "a_key": "", # Properties of the object. Contains field @type with type URL.
1654          },
1655          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1656              # execute the job.  If zero or unspecified, the service will
1657              # attempt to choose a reasonable default.
1658          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1659              # harness, residing in Google Container Registry.
1660          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1661              # the form "regions/REGION/subnetworks/SUBNETWORK".
1662          "dataDisks": [ # Data disks that are used by a VM in this workflow.
1663            { # Describes the data disk used by a workflow job.
1664              "mountPoint": "A String", # Directory in a VM where disk is mounted.
1665              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1666                  # attempt to choose a reasonable default.
1667              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1668                  # must be a disk type appropriate to the project and zone in which
1669                  # the workers will run.  If unknown or unspecified, the service
1670                  # will attempt to choose a reasonable default.
1671                  #
1672                  # For example, the standard persistent disk type is a resource name
1673                  # typically ending in "pd-standard".  If SSD persistent disks are
1674                  # available, the resource name typically ends with "pd-ssd".  The
1675                  # actual valid values are defined the Google Compute Engine API,
1676                  # not by the Cloud Dataflow API; consult the Google Compute Engine
1677                  # documentation for more information about determining the set of
1678                  # available disk types for a particular project and zone.
1679                  #
1680                  # Google Compute Engine Disk types are local to a particular
1681                  # project in a particular zone, and so the resource name will
1682                  # typically look something like this:
1683                  #
1684                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1685            },
1686          ],
1687          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1688            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1689            "algorithm": "A String", # The algorithm to use for autoscaling.
1690          },
1691          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1692              # select a default set of packages which are useful to worker
1693              # harnesses written in a particular language.
1694          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1695              # attempt to choose a reasonable default.
1696          "metadata": { # Metadata to set on the Google Compute Engine VMs.
1697            "a_key": "A String",
1698          },
1699        },
1700      ],
1701      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1702          # storage.  The system will append the suffix "/temp-{JOBNAME} to
1703          # this resource prefix, where {JOBNAME} is the value of the
1704          # job_name field.  The resulting bucket and object prefix is used
1705          # as the prefix of the resources used to store temporary data
1706          # needed during the job execution.  NOTE: This will override the
1707          # value in taskrunner_settings.
1708          # The supported resource type is:
1709          #
1710          # Google Cloud Storage:
1711          #
1712          #   storage.googleapis.com/{bucket}/{object}
1713          #   bucket.storage.googleapis.com/{object}
1714    },
1715    "location": "A String", # The [regional endpoint]
1716        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1717        # contains this job.
1718    "tempFiles": [ # A set of files the system should be aware of that are used
1719        # for temporary storage. These temporary files will be
1720        # removed on job completion.
1721        # No duplicates are allowed.
1722        # No file patterns are supported.
1723        #
1724        # The supported files are:
1725        #
1726        # Google Cloud Storage:
1727        #
1728        #    storage.googleapis.com/{bucket}/{object}
1729        #    bucket.storage.googleapis.com/{object}
1730      "A String",
1731    ],
1732    "type": "A String", # The type of Cloud Dataflow job.
1733    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1734        # If this field is set, the service will ensure its uniqueness.
1735        # The request to create a job will fail if the service has knowledge of a
1736        # previously submitted job with the same client's ID and job name.
1737        # The caller may use this field to ensure idempotence of job
1738        # creation across retried attempts to create a job.
1739        # By default, the field is empty and, in that case, the service ignores it.
1740    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
1741        # snapshot.
1742    "stepsLocation": "A String", # The GCS location where the steps are stored.
1743    "currentStateTime": "A String", # The timestamp associated with the current state.
1744    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
1745        # Flexible resource scheduling jobs are started with some delay after job
1746        # creation, so start_time is unset before start and is updated when the
1747        # job is started by the Cloud Dataflow service. For other jobs, start_time
1748        # always equals to create_time and is immutable and set by the Cloud Dataflow
1749        # service.
1750    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1751        # Cloud Dataflow service.
1752    "requestedState": "A String", # The job's requested state.
1753        #
1754        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1755        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1756        # also be used to directly set a job's requested state to
1757        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1758        # job if it has not already reached a terminal state.
1759    "name": "A String", # The user-specified Cloud Dataflow job name.
1760        #
1761        # Only one Job with a given name may exist in a project at any
1762        # given time. If a caller attempts to create a Job with the same
1763        # name as an already-existing Job, the attempt returns the
1764        # existing Job.
1765        #
1766        # The name must match the regular expression
1767        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1768    "steps": [ # Exactly one of step or steps_location should be specified.
1769        #
1770        # The top-level steps that constitute the entire job.
1771      { # Defines a particular step within a Cloud Dataflow job.
1772          #
1773          # A job consists of multiple steps, each of which performs some
1774          # specific operation as part of the overall job.  Data is typically
1775          # passed from one step to another as part of the job.
1776          #
1777          # Here's an example of a sequence of steps which together implement a
1778          # Map-Reduce job:
1779          #
1780          #   * Read a collection of data from some source, parsing the
1781          #     collection's elements.
1782          #
1783          #   * Validate the elements.
1784          #
1785          #   * Apply a user-defined function to map each element to some value
1786          #     and extract an element-specific key value.
1787          #
1788          #   * Group elements with the same key into a single element with
1789          #     that key, transforming a multiply-keyed collection into a
1790          #     uniquely-keyed collection.
1791          #
1792          #   * Write the elements out to some data sink.
1793          #
1794          # Note that the Cloud Dataflow service may be used to run many different
1795          # types of jobs, not just Map-Reduce.
1796        "kind": "A String", # The kind of step in the Cloud Dataflow job.
1797        "properties": { # Named properties associated with the step. Each kind of
1798            # predefined step has its own required set of properties.
1799            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1800          "a_key": "", # Properties of the object.
1801        },
1802        "name": "A String", # The name that identifies the step. This must be unique for each
1803            # step with respect to all other steps in the Cloud Dataflow job.
1804      },
1805    ],
1806    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1807        # of the job it replaced.
1808        #
1809        # When sending a `CreateJobRequest`, you can update a job by specifying it
1810        # here. The job named here is stopped, and its intermediate state is
1811        # transferred to this job.
1812    "currentState": "A String", # The current state of the job.
1813        #
1814        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1815        # specified.
1816        #
1817        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1818        # terminal state. After a job has reached a terminal state, no
1819        # further state updates may be made.
1820        #
1821        # This field may be mutated by the Cloud Dataflow service;
1822        # callers cannot mutate it.
1823    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1824        # isn't contained in the submitted job.
1825      "stages": { # A mapping from each stage to the information about that stage.
1826        "a_key": { # Contains information about how a particular
1827            # google.dataflow.v1beta3.Step will be executed.
1828          "stepName": [ # The steps associated with the execution stage.
1829              # Note that stages may have several steps, and that a given step
1830              # might be run by more than one stage.
1831            "A String",
1832          ],
1833        },
1834      },
1835    },
1836  }</pre>
1837</div>
1838
1839<div class="method">
1840    <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
1841  <pre>Request the job status.
1842
1843To request the status of a job, we recommend using
1844`projects.locations.jobs.getMetrics` with a [regional endpoint]
1845(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
1846`projects.jobs.getMetrics` is not recommended, as you can only request the
1847status of jobs that are running in `us-central1`.
1848
1849Args:
1850  projectId: string, A project id. (required)
1851  location: string, The [regional endpoint]
1852(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1853contains the job specified by job_id. (required)
1854  jobId: string, The job to get messages for. (required)
1855  startTime: string, Return only metric data that has changed since this time.
1856Default is to return all information about all metrics for the job.
1857  x__xgafv: string, V1 error format.
1858    Allowed values
1859      1 - v1 error format
1860      2 - v2 error format
1861
1862Returns:
1863  An object of the form:
1864
1865    { # JobMetrics contains a collection of metrics describing the detailed progress
1866      # of a Dataflow job. Metrics correspond to user-defined and system-defined
1867      # metrics in the job.
1868      #
1869      # This resource captures only the most recent values of each metric;
1870      # time-series data can be queried for them (under the same metric names)
1871      # from Cloud Monitoring.
1872    "metrics": [ # All metrics for this job.
1873      { # Describes the state of a metric.
1874        "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1875            # This holds the count of the aggregated values and is used in combination
1876            # with mean_sum above to obtain the actual mean aggregate value.
1877            # The only possible value type is Long.
1878        "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
1879            # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
1880            # The specified aggregation kind is case-insensitive.
1881            #
1882            # If omitted, this is not an aggregated value but instead
1883            # a single metric sample value.
1884        "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
1885            # possible value type is a list of Values whose type can be Long, Double,
1886            # or String, according to the metric's type.  All Values in the list must
1887            # be of the same type.
1888        "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1889            # metric.
1890          "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1891              # will be "dataflow" for metrics defined by the Dataflow service or SDK.
1892          "name": "A String", # Worker-defined metric name.
1893          "context": { # Zero or more labeled fields which identify the part of the job this
1894              # metric is associated with, such as the name of a step or collection.
1895              #
1896              # For example, built-in counters associated with steps will have
1897              # context['step'] = <step-name>. Counters associated with PCollections
1898              # in the SDK will have context['pcollection'] = <pcollection-name>.
1899            "a_key": "A String",
1900          },
1901        },
1902        "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1903            # This holds the sum of the aggregated values and is used in combination
1904            # with mean_count below to obtain the actual mean aggregate value.
1905            # The only possible value types are Long and Double.
1906        "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1907            # value accumulated since the worker started working on this WorkItem.
1908            # By default this is false, indicating that this metric is reported
1909            # as a delta that is not associated with any WorkItem.
1910        "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1911            # reporting work progress; it will be filled in responses from the
1912            # metrics API.
1913        "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1914            # "And", and "Or".  The possible value types are Long, Double, and Boolean.
1915        "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1916            # service.
1917        "gauge": "", # A struct value describing properties of a Gauge.
1918            # Metrics of gauge type show the value of a metric across time, and is
1919            # aggregated based on the newest value.
1920        "distribution": "", # A struct value describing properties of a distribution of numeric values.
1921      },
1922    ],
1923    "metricTime": "A String", # Timestamp as of which metric values are current.
1924  }</pre>
1925</div>
1926
1927<div class="method">
1928    <code class="details" id="list">list(projectId, location, pageSize=None, pageToken=None, x__xgafv=None, filter=None, view=None)</code>
1929  <pre>List the jobs of a project.
1930
1931To list the jobs of a project in a region, we recommend using
1932`projects.locations.jobs.get` with a [regional endpoint]
1933(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). To
1934list the all jobs across all regions, use `projects.jobs.aggregated`. Using
1935`projects.jobs.list` is not recommended, as you can only get the list of
1936jobs that are running in `us-central1`.
1937
1938Args:
1939  projectId: string, The project which owns the jobs. (required)
1940  location: string, The [regional endpoint]
1941(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1942contains this job. (required)
1943  pageSize: integer, If there are many jobs, limit response to at most this many.
1944The actual number of jobs returned will be the lesser of max_responses
1945and an unspecified server-defined limit.
1946  pageToken: string, Set this to the 'next_page_token' field of a previous response
1947to request additional results in a long list.
1948  x__xgafv: string, V1 error format.
1949    Allowed values
1950      1 - v1 error format
1951      2 - v2 error format
1952  filter: string, The kind of filter to use.
1953  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
1954
1955Returns:
1956  An object of the form:
1957
1958    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
1959      # response, depending on the page size in the ListJobsRequest.
1960    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
1961    "failedLocation": [ # Zero or more messages describing the [regional endpoints]
1962        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1963        # failed to respond.
1964      { # Indicates which [regional endpoint]
1965          # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) failed
1966          # to respond to a request for data.
1967        "name": "A String", # The name of the [regional endpoint]
1968            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
1969            # failed to respond.
1970      },
1971    ],
1972    "jobs": [ # A subset of the requested job information.
1973      { # Defines a job to be run by the Cloud Dataflow service.
1974        "labels": { # User-defined labels for this job.
1975            #
1976            # The labels map can contain no more than 64 entries.  Entries of the labels
1977            # map are UTF8 strings that comply with the following restrictions:
1978            #
1979            # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1980            # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1981            # * Both keys and values are additionally constrained to be <= 128 bytes in
1982            # size.
1983          "a_key": "A String",
1984        },
1985        "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
1986            # by the metadata values provided here. Populated for ListJobs and all GetJob
1987            # views SUMMARY and higher.
1988            # ListJob response and Job SUMMARY view.
1989          "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
1990            "versionDisplayName": "A String", # A readable string describing the version of the SDK.
1991            "version": "A String", # The version of the SDK used to run the job.
1992            "sdkSupportStatus": "A String", # The support status for this SDK version.
1993          },
1994          "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
1995            { # Metadata for a PubSub connector used by the job.
1996              "topic": "A String", # Topic accessed in the connection.
1997              "subscription": "A String", # Subscription used in the connection.
1998            },
1999          ],
2000          "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
2001            { # Metadata for a Datastore connector used by the job.
2002              "projectId": "A String", # ProjectId accessed in the connection.
2003              "namespace": "A String", # Namespace used in the connection.
2004            },
2005          ],
2006          "fileDetails": [ # Identification of a File source used in the Dataflow job.
2007            { # Metadata for a File connector used by the job.
2008              "filePattern": "A String", # File Pattern used to access files by the connector.
2009            },
2010          ],
2011          "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
2012            { # Metadata for a Spanner connector used by the job.
2013              "instanceId": "A String", # InstanceId accessed in the connection.
2014              "projectId": "A String", # ProjectId accessed in the connection.
2015              "databaseId": "A String", # DatabaseId accessed in the connection.
2016            },
2017          ],
2018          "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
2019            { # Metadata for a BigTable connector used by the job.
2020              "instanceId": "A String", # InstanceId accessed in the connection.
2021              "projectId": "A String", # ProjectId accessed in the connection.
2022              "tableId": "A String", # TableId accessed in the connection.
2023            },
2024          ],
2025          "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
2026            { # Metadata for a BigQuery connector used by the job.
2027              "projectId": "A String", # Project accessed in the connection.
2028              "dataset": "A String", # Dataset accessed in the connection.
2029              "table": "A String", # Table accessed in the connection.
2030              "query": "A String", # Query used to access data in the connection.
2031            },
2032          ],
2033        },
2034        "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2035            # A description of the user pipeline and stages through which it is executed.
2036            # Created by Cloud Dataflow service.  Only retrieved with
2037            # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2038            # form.  This data is provided by the Dataflow service for ease of visualizing
2039            # the pipeline and interpreting Dataflow provided metrics.
2040          "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2041            { # Description of the type, names/ids, and input/outputs for a transform.
2042              "kind": "A String", # Type of transform.
2043              "name": "A String", # User provided name for this transform instance.
2044              "inputCollectionName": [ # User names for all collection inputs to this transform.
2045                "A String",
2046              ],
2047              "displayData": [ # Transform-specific display data.
2048                { # Data provided with a pipeline or transform to provide descriptive info.
2049                  "shortStrValue": "A String", # A possible additional shorter value to display.
2050                      # For example a java_class_name_value of com.mypackage.MyDoFn
2051                      # will be stored with MyDoFn as the short_str_value and
2052                      # com.mypackage.MyDoFn as the java_class_name value.
2053                      # short_str_value can be displayed and java_class_name_value
2054                      # will be displayed as a tooltip.
2055                  "durationValue": "A String", # Contains value if the data is of duration type.
2056                  "url": "A String", # An optional full URL.
2057                  "floatValue": 3.14, # Contains value if the data is of float type.
2058                  "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2059                      # language namespace (i.e. python module) which defines the display data.
2060                      # This allows a dax monitoring system to specially handle the data
2061                      # and perform custom rendering.
2062                  "javaClassValue": "A String", # Contains value if the data is of java class type.
2063                  "label": "A String", # An optional label to display in a dax UI for the element.
2064                  "boolValue": True or False, # Contains value if the data is of a boolean type.
2065                  "strValue": "A String", # Contains value if the data is of string type.
2066                  "key": "A String", # The key identifying the display data.
2067                      # This is intended to be used as a label for the display data
2068                      # when viewed in a dax monitoring system.
2069                  "int64Value": "A String", # Contains value if the data is of int64 type.
2070                  "timestampValue": "A String", # Contains value if the data is of timestamp type.
2071                },
2072              ],
2073              "outputCollectionName": [ # User  names for all collection outputs to this transform.
2074                "A String",
2075              ],
2076              "id": "A String", # SDK generated id of this transform instance.
2077            },
2078          ],
2079          "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2080            { # Description of the composing transforms, names/ids, and input/outputs of a
2081                # stage of execution.  Some composing transforms and sources may have been
2082                # generated by the Dataflow service during execution planning.
2083              "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2084                { # Description of an interstitial value between transforms in an execution
2085                    # stage.
2086                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2087                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2088                      # source is most closely associated.
2089                  "name": "A String", # Dataflow service generated name for this source.
2090                },
2091              ],
2092              "kind": "A String", # Type of tranform this stage is executing.
2093              "name": "A String", # Dataflow service generated name for this stage.
2094              "outputSource": [ # Output sources for this stage.
2095                { # Description of an input or output of an execution stage.
2096                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
2097                  "sizeBytes": "A String", # Size of the source, if measurable.
2098                  "name": "A String", # Dataflow service generated name for this source.
2099                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2100                      # source is most closely associated.
2101                },
2102              ],
2103              "inputSource": [ # Input sources for this stage.
2104                { # Description of an input or output of an execution stage.
2105                  "userName": "A String", # Human-readable name for this source; may be user or system generated.
2106                  "sizeBytes": "A String", # Size of the source, if measurable.
2107                  "name": "A String", # Dataflow service generated name for this source.
2108                  "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2109                      # source is most closely associated.
2110                },
2111              ],
2112              "componentTransform": [ # Transforms that comprise this execution stage.
2113                { # Description of a transform executed as part of an execution stage.
2114                  "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2115                  "originalTransform": "A String", # User name for the original user transform with which this transform is
2116                      # most closely associated.
2117                  "name": "A String", # Dataflow service generated name for this source.
2118                },
2119              ],
2120              "id": "A String", # Dataflow service generated id for this stage.
2121            },
2122          ],
2123          "displayData": [ # Pipeline level display data.
2124            { # Data provided with a pipeline or transform to provide descriptive info.
2125              "shortStrValue": "A String", # A possible additional shorter value to display.
2126                  # For example a java_class_name_value of com.mypackage.MyDoFn
2127                  # will be stored with MyDoFn as the short_str_value and
2128                  # com.mypackage.MyDoFn as the java_class_name value.
2129                  # short_str_value can be displayed and java_class_name_value
2130                  # will be displayed as a tooltip.
2131              "durationValue": "A String", # Contains value if the data is of duration type.
2132              "url": "A String", # An optional full URL.
2133              "floatValue": 3.14, # Contains value if the data is of float type.
2134              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2135                  # language namespace (i.e. python module) which defines the display data.
2136                  # This allows a dax monitoring system to specially handle the data
2137                  # and perform custom rendering.
2138              "javaClassValue": "A String", # Contains value if the data is of java class type.
2139              "label": "A String", # An optional label to display in a dax UI for the element.
2140              "boolValue": True or False, # Contains value if the data is of a boolean type.
2141              "strValue": "A String", # Contains value if the data is of string type.
2142              "key": "A String", # The key identifying the display data.
2143                  # This is intended to be used as a label for the display data
2144                  # when viewed in a dax monitoring system.
2145              "int64Value": "A String", # Contains value if the data is of int64 type.
2146              "timestampValue": "A String", # Contains value if the data is of timestamp type.
2147            },
2148          ],
2149        },
2150        "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2151            # callers cannot mutate it.
2152          { # A message describing the state of a particular execution stage.
2153            "executionStageName": "A String", # The name of the execution stage.
2154            "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2155            "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2156          },
2157        ],
2158        "id": "A String", # The unique ID of this job.
2159            #
2160            # This field is set by the Cloud Dataflow service when the Job is
2161            # created, and is immutable for the life of the job.
2162        "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2163            # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2164        "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2165        "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2166            # corresponding name prefixes of the new job.
2167          "a_key": "A String",
2168        },
2169        "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2170          "version": { # A structure describing which components and their versions of the service
2171              # are required in order to run the job.
2172            "a_key": "", # Properties of the object.
2173          },
2174          "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
2175          "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
2176              # at rest, AKA a Customer Managed Encryption Key (CMEK).
2177              #
2178              # Format:
2179              #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
2180          "internalExperiments": { # Experimental settings.
2181            "a_key": "", # Properties of the object. Contains field @type with type URL.
2182          },
2183          "dataset": "A String", # The dataset for the current project where various workflow
2184              # related tables are stored.
2185              #
2186              # The supported resource type is:
2187              #
2188              # Google BigQuery:
2189              #   bigquery.googleapis.com/{dataset}
2190          "experiments": [ # The list of experiments to enable.
2191            "A String",
2192          ],
2193          "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2194          "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2195              # options are passed through the service and are used to recreate the
2196              # SDK pipeline options on the worker in a language agnostic and platform
2197              # independent way.
2198            "a_key": "", # Properties of the object.
2199          },
2200          "userAgent": { # A description of the process that generated the request.
2201            "a_key": "", # Properties of the object.
2202          },
2203          "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2204              # unspecified, the service will attempt to choose a reasonable
2205              # default.  This should be in the form of the API service name,
2206              # e.g. "compute.googleapis.com".
2207          "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2208              # specified in order for the job to have workers.
2209            { # Describes one particular pool of Cloud Dataflow workers to be
2210                # instantiated by the Cloud Dataflow service in order to perform the
2211                # computations required by a job.  Note that a workflow job may use
2212                # multiple pools, in order to match the various computational
2213                # requirements of the various stages of the job.
2214              "diskSourceImage": "A String", # Fully qualified source image for disks.
2215              "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2216                  # using the standard Dataflow task runner.  Users should ignore
2217                  # this field.
2218                "workflowFileName": "A String", # The file to store the workflow in.
2219                "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2220                    # will not be uploaded.
2221                    #
2222                    # The supported resource type is:
2223                    #
2224                    # Google Cloud Storage:
2225                    #   storage.googleapis.com/{bucket}/{object}
2226                    #   bucket.storage.googleapis.com/{object}
2227                "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2228                "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2229                  "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2230                  "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2231                      # "shuffle/v1beta1".
2232                  "workerId": "A String", # The ID of the worker running this pipeline.
2233                  "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2234                      #
2235                      # When workers access Google Cloud APIs, they logically do so via
2236                      # relative URLs.  If this field is specified, it supplies the base
2237                      # URL to use for resolving these relative URLs.  The normative
2238                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2239                      # Locators".
2240                      #
2241                      # If not specified, the default value is "http://www.googleapis.com/"
2242                  "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2243                      # "dataflow/v1b3/projects".
2244                  "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2245                      # storage.
2246                      #
2247                      # The supported resource type is:
2248                      #
2249                      # Google Cloud Storage:
2250                      #
2251                      #   storage.googleapis.com/{bucket}/{object}
2252                      #   bucket.storage.googleapis.com/{object}
2253                },
2254                "vmId": "A String", # The ID string of the VM.
2255                "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2256                "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2257                "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2258                    # access the Cloud Dataflow API.
2259                  "A String",
2260                ],
2261                "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2262                    # taskrunner; e.g. "root".
2263                "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2264                    #
2265                    # When workers access Google Cloud APIs, they logically do so via
2266                    # relative URLs.  If this field is specified, it supplies the base
2267                    # URL to use for resolving these relative URLs.  The normative
2268                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2269                    # Locators".
2270                    #
2271                    # If not specified, the default value is "http://www.googleapis.com/"
2272                "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2273                    # taskrunner; e.g. "wheel".
2274                "languageHint": "A String", # The suggested backend language.
2275                "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2276                    # console.
2277                "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2278                "logDir": "A String", # The directory on the VM to store logs.
2279                "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2280                "harnessCommand": "A String", # The command to launch the worker harness.
2281                "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2282                    # temporary storage.
2283                    #
2284                    # The supported resource type is:
2285                    #
2286                    # Google Cloud Storage:
2287                    #   storage.googleapis.com/{bucket}/{object}
2288                    #   bucket.storage.googleapis.com/{object}
2289                "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2290              },
2291              "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2292                  # are supported.
2293              "packages": [ # Packages to be installed on workers.
2294                { # The packages that must be installed in order for a worker to run the
2295                    # steps of the Cloud Dataflow job that will be assigned to its worker
2296                    # pool.
2297                    #
2298                    # This is the mechanism by which the Cloud Dataflow SDK causes code to
2299                    # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2300                    # might use this to install jars containing the user's code and all of the
2301                    # various dependencies (libraries, data files, etc.) required in order
2302                    # for that code to run.
2303                  "location": "A String", # The resource to read the package from. The supported resource type is:
2304                      #
2305                      # Google Cloud Storage:
2306                      #
2307                      #   storage.googleapis.com/{bucket}
2308                      #   bucket.storage.googleapis.com/
2309                  "name": "A String", # The name of the package.
2310                },
2311              ],
2312              "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2313                  # service will attempt to choose a reasonable default.
2314              "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2315                  # the service will use the network "default".
2316              "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2317                  # will attempt to choose a reasonable default.
2318              "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2319                  # attempt to choose a reasonable default.
2320              "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2321                  # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2322                  # `TEARDOWN_NEVER`.
2323                  # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2324                  # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2325                  # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2326                  # down.
2327                  #
2328                  # If the workers are not torn down by the service, they will
2329                  # continue to run and use Google Compute Engine VM resources in the
2330                  # user's project until they are explicitly terminated by the user.
2331                  # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2332                  # policy except for small, manually supervised test jobs.
2333                  #
2334                  # If unknown or unspecified, the service will attempt to choose a reasonable
2335                  # default.
2336              "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2337                  # Compute Engine API.
2338              "ipConfiguration": "A String", # Configuration for VM IPs.
2339              "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2340                  # service will choose a number of threads (according to the number of cores
2341                  # on the selected machine type for batch, or 1 by convention for streaming).
2342              "poolArgs": { # Extra arguments for this worker pool.
2343                "a_key": "", # Properties of the object. Contains field @type with type URL.
2344              },
2345              "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2346                  # execute the job.  If zero or unspecified, the service will
2347                  # attempt to choose a reasonable default.
2348              "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2349                  # harness, residing in Google Container Registry.
2350              "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2351                  # the form "regions/REGION/subnetworks/SUBNETWORK".
2352              "dataDisks": [ # Data disks that are used by a VM in this workflow.
2353                { # Describes the data disk used by a workflow job.
2354                  "mountPoint": "A String", # Directory in a VM where disk is mounted.
2355                  "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2356                      # attempt to choose a reasonable default.
2357                  "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2358                      # must be a disk type appropriate to the project and zone in which
2359                      # the workers will run.  If unknown or unspecified, the service
2360                      # will attempt to choose a reasonable default.
2361                      #
2362                      # For example, the standard persistent disk type is a resource name
2363                      # typically ending in "pd-standard".  If SSD persistent disks are
2364                      # available, the resource name typically ends with "pd-ssd".  The
2365                      # actual valid values are defined the Google Compute Engine API,
2366                      # not by the Cloud Dataflow API; consult the Google Compute Engine
2367                      # documentation for more information about determining the set of
2368                      # available disk types for a particular project and zone.
2369                      #
2370                      # Google Compute Engine Disk types are local to a particular
2371                      # project in a particular zone, and so the resource name will
2372                      # typically look something like this:
2373                      #
2374                      # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2375                },
2376              ],
2377              "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2378                "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2379                "algorithm": "A String", # The algorithm to use for autoscaling.
2380              },
2381              "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
2382                  # select a default set of packages which are useful to worker
2383                  # harnesses written in a particular language.
2384              "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2385                  # attempt to choose a reasonable default.
2386              "metadata": { # Metadata to set on the Google Compute Engine VMs.
2387                "a_key": "A String",
2388              },
2389            },
2390          ],
2391          "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2392              # storage.  The system will append the suffix "/temp-{JOBNAME} to
2393              # this resource prefix, where {JOBNAME} is the value of the
2394              # job_name field.  The resulting bucket and object prefix is used
2395              # as the prefix of the resources used to store temporary data
2396              # needed during the job execution.  NOTE: This will override the
2397              # value in taskrunner_settings.
2398              # The supported resource type is:
2399              #
2400              # Google Cloud Storage:
2401              #
2402              #   storage.googleapis.com/{bucket}/{object}
2403              #   bucket.storage.googleapis.com/{object}
2404        },
2405        "location": "A String", # The [regional endpoint]
2406            # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2407            # contains this job.
2408        "tempFiles": [ # A set of files the system should be aware of that are used
2409            # for temporary storage. These temporary files will be
2410            # removed on job completion.
2411            # No duplicates are allowed.
2412            # No file patterns are supported.
2413            #
2414            # The supported files are:
2415            #
2416            # Google Cloud Storage:
2417            #
2418            #    storage.googleapis.com/{bucket}/{object}
2419            #    bucket.storage.googleapis.com/{object}
2420          "A String",
2421        ],
2422        "type": "A String", # The type of Cloud Dataflow job.
2423        "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2424            # If this field is set, the service will ensure its uniqueness.
2425            # The request to create a job will fail if the service has knowledge of a
2426            # previously submitted job with the same client's ID and job name.
2427            # The caller may use this field to ensure idempotence of job
2428            # creation across retried attempts to create a job.
2429            # By default, the field is empty and, in that case, the service ignores it.
2430        "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
2431            # snapshot.
2432        "stepsLocation": "A String", # The GCS location where the steps are stored.
2433        "currentStateTime": "A String", # The timestamp associated with the current state.
2434        "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
2435            # Flexible resource scheduling jobs are started with some delay after job
2436            # creation, so start_time is unset before start and is updated when the
2437            # job is started by the Cloud Dataflow service. For other jobs, start_time
2438            # always equals to create_time and is immutable and set by the Cloud Dataflow
2439            # service.
2440        "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2441            # Cloud Dataflow service.
2442        "requestedState": "A String", # The job's requested state.
2443            #
2444            # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2445            # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2446            # also be used to directly set a job's requested state to
2447            # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2448            # job if it has not already reached a terminal state.
2449        "name": "A String", # The user-specified Cloud Dataflow job name.
2450            #
2451            # Only one Job with a given name may exist in a project at any
2452            # given time. If a caller attempts to create a Job with the same
2453            # name as an already-existing Job, the attempt returns the
2454            # existing Job.
2455            #
2456            # The name must match the regular expression
2457            # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2458        "steps": [ # Exactly one of step or steps_location should be specified.
2459            #
2460            # The top-level steps that constitute the entire job.
2461          { # Defines a particular step within a Cloud Dataflow job.
2462              #
2463              # A job consists of multiple steps, each of which performs some
2464              # specific operation as part of the overall job.  Data is typically
2465              # passed from one step to another as part of the job.
2466              #
2467              # Here's an example of a sequence of steps which together implement a
2468              # Map-Reduce job:
2469              #
2470              #   * Read a collection of data from some source, parsing the
2471              #     collection's elements.
2472              #
2473              #   * Validate the elements.
2474              #
2475              #   * Apply a user-defined function to map each element to some value
2476              #     and extract an element-specific key value.
2477              #
2478              #   * Group elements with the same key into a single element with
2479              #     that key, transforming a multiply-keyed collection into a
2480              #     uniquely-keyed collection.
2481              #
2482              #   * Write the elements out to some data sink.
2483              #
2484              # Note that the Cloud Dataflow service may be used to run many different
2485              # types of jobs, not just Map-Reduce.
2486            "kind": "A String", # The kind of step in the Cloud Dataflow job.
2487            "properties": { # Named properties associated with the step. Each kind of
2488                # predefined step has its own required set of properties.
2489                # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2490              "a_key": "", # Properties of the object.
2491            },
2492            "name": "A String", # The name that identifies the step. This must be unique for each
2493                # step with respect to all other steps in the Cloud Dataflow job.
2494          },
2495        ],
2496        "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2497            # of the job it replaced.
2498            #
2499            # When sending a `CreateJobRequest`, you can update a job by specifying it
2500            # here. The job named here is stopped, and its intermediate state is
2501            # transferred to this job.
2502        "currentState": "A String", # The current state of the job.
2503            #
2504            # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2505            # specified.
2506            #
2507            # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2508            # terminal state. After a job has reached a terminal state, no
2509            # further state updates may be made.
2510            #
2511            # This field may be mutated by the Cloud Dataflow service;
2512            # callers cannot mutate it.
2513        "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2514            # isn't contained in the submitted job.
2515          "stages": { # A mapping from each stage to the information about that stage.
2516            "a_key": { # Contains information about how a particular
2517                # google.dataflow.v1beta3.Step will be executed.
2518              "stepName": [ # The steps associated with the execution stage.
2519                  # Note that stages may have several steps, and that a given step
2520                  # might be run by more than one stage.
2521                "A String",
2522              ],
2523            },
2524          },
2525        },
2526      },
2527    ],
2528  }</pre>
2529</div>
2530
2531<div class="method">
2532    <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2533  <pre>Retrieves the next page of results.
2534
2535Args:
2536  previous_request: The request for the previous page. (required)
2537  previous_response: The response from the request for the previous page. (required)
2538
2539Returns:
2540  A request object that you can call 'execute()' on to request the next
2541  page. Returns None if there are no more items in the collection.
2542    </pre>
2543</div>
2544
2545<div class="method">
2546    <code class="details" id="snapshot">snapshot(projectId, location, jobId, body, x__xgafv=None)</code>
2547  <pre>Snapshot the state of a streaming job.
2548
2549Args:
2550  projectId: string, The project which owns the job to be snapshotted. (required)
2551  location: string, The location that contains this job. (required)
2552  jobId: string, The job to be snapshotted. (required)
2553  body: object, The request body. (required)
2554    The object takes the form of:
2555
2556{ # Request to create a snapshot of a job.
2557    "location": "A String", # The location that contains this job.
2558    "ttl": "A String", # TTL for the snapshot.
2559  }
2560
2561  x__xgafv: string, V1 error format.
2562    Allowed values
2563      1 - v1 error format
2564      2 - v2 error format
2565
2566Returns:
2567  An object of the form:
2568
2569    { # Represents a snapshot of a job.
2570    "sourceJobId": "A String", # The job this snapshot was created from.
2571    "projectId": "A String", # The project this snapshot belongs to.
2572    "creationTime": "A String", # The time this snapshot was created.
2573    "state": "A String", # State of the snapshot.
2574    "ttl": "A String", # The time after which this snapshot will be automatically deleted.
2575    "id": "A String", # The unique ID of this snapshot.
2576  }</pre>
2577</div>
2578
2579<div class="method">
2580    <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
2581  <pre>Updates the state of an existing Cloud Dataflow job.
2582
2583To update the state of an existing job, we recommend using
2584`projects.locations.jobs.update` with a [regional endpoint]
2585(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints). Using
2586`projects.jobs.update` is not recommended, as you can only update the state
2587of jobs that are running in `us-central1`.
2588
2589Args:
2590  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2591  location: string, The [regional endpoint]
2592(https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
2593contains this job. (required)
2594  jobId: string, The job ID. (required)
2595  body: object, The request body. (required)
2596    The object takes the form of:
2597
2598{ # Defines a job to be run by the Cloud Dataflow service.
2599  "labels": { # User-defined labels for this job.
2600      #
2601      # The labels map can contain no more than 64 entries.  Entries of the labels
2602      # map are UTF8 strings that comply with the following restrictions:
2603      #
2604      # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2605      # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2606      # * Both keys and values are additionally constrained to be <= 128 bytes in
2607      # size.
2608    "a_key": "A String",
2609  },
2610  "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
2611      # by the metadata values provided here. Populated for ListJobs and all GetJob
2612      # views SUMMARY and higher.
2613      # ListJob response and Job SUMMARY view.
2614    "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
2615      "versionDisplayName": "A String", # A readable string describing the version of the SDK.
2616      "version": "A String", # The version of the SDK used to run the job.
2617      "sdkSupportStatus": "A String", # The support status for this SDK version.
2618    },
2619    "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
2620      { # Metadata for a PubSub connector used by the job.
2621        "topic": "A String", # Topic accessed in the connection.
2622        "subscription": "A String", # Subscription used in the connection.
2623      },
2624    ],
2625    "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
2626      { # Metadata for a Datastore connector used by the job.
2627        "projectId": "A String", # ProjectId accessed in the connection.
2628        "namespace": "A String", # Namespace used in the connection.
2629      },
2630    ],
2631    "fileDetails": [ # Identification of a File source used in the Dataflow job.
2632      { # Metadata for a File connector used by the job.
2633        "filePattern": "A String", # File Pattern used to access files by the connector.
2634      },
2635    ],
2636    "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
2637      { # Metadata for a Spanner connector used by the job.
2638        "instanceId": "A String", # InstanceId accessed in the connection.
2639        "projectId": "A String", # ProjectId accessed in the connection.
2640        "databaseId": "A String", # DatabaseId accessed in the connection.
2641      },
2642    ],
2643    "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
2644      { # Metadata for a BigTable connector used by the job.
2645        "instanceId": "A String", # InstanceId accessed in the connection.
2646        "projectId": "A String", # ProjectId accessed in the connection.
2647        "tableId": "A String", # TableId accessed in the connection.
2648      },
2649    ],
2650    "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
2651      { # Metadata for a BigQuery connector used by the job.
2652        "projectId": "A String", # Project accessed in the connection.
2653        "dataset": "A String", # Dataset accessed in the connection.
2654        "table": "A String", # Table accessed in the connection.
2655        "query": "A String", # Query used to access data in the connection.
2656      },
2657    ],
2658  },
2659  "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2660      # A description of the user pipeline and stages through which it is executed.
2661      # Created by Cloud Dataflow service.  Only retrieved with
2662      # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2663      # form.  This data is provided by the Dataflow service for ease of visualizing
2664      # the pipeline and interpreting Dataflow provided metrics.
2665    "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2666      { # Description of the type, names/ids, and input/outputs for a transform.
2667        "kind": "A String", # Type of transform.
2668        "name": "A String", # User provided name for this transform instance.
2669        "inputCollectionName": [ # User names for all collection inputs to this transform.
2670          "A String",
2671        ],
2672        "displayData": [ # Transform-specific display data.
2673          { # Data provided with a pipeline or transform to provide descriptive info.
2674            "shortStrValue": "A String", # A possible additional shorter value to display.
2675                # For example a java_class_name_value of com.mypackage.MyDoFn
2676                # will be stored with MyDoFn as the short_str_value and
2677                # com.mypackage.MyDoFn as the java_class_name value.
2678                # short_str_value can be displayed and java_class_name_value
2679                # will be displayed as a tooltip.
2680            "durationValue": "A String", # Contains value if the data is of duration type.
2681            "url": "A String", # An optional full URL.
2682            "floatValue": 3.14, # Contains value if the data is of float type.
2683            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2684                # language namespace (i.e. python module) which defines the display data.
2685                # This allows a dax monitoring system to specially handle the data
2686                # and perform custom rendering.
2687            "javaClassValue": "A String", # Contains value if the data is of java class type.
2688            "label": "A String", # An optional label to display in a dax UI for the element.
2689            "boolValue": True or False, # Contains value if the data is of a boolean type.
2690            "strValue": "A String", # Contains value if the data is of string type.
2691            "key": "A String", # The key identifying the display data.
2692                # This is intended to be used as a label for the display data
2693                # when viewed in a dax monitoring system.
2694            "int64Value": "A String", # Contains value if the data is of int64 type.
2695            "timestampValue": "A String", # Contains value if the data is of timestamp type.
2696          },
2697        ],
2698        "outputCollectionName": [ # User  names for all collection outputs to this transform.
2699          "A String",
2700        ],
2701        "id": "A String", # SDK generated id of this transform instance.
2702      },
2703    ],
2704    "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2705      { # Description of the composing transforms, names/ids, and input/outputs of a
2706          # stage of execution.  Some composing transforms and sources may have been
2707          # generated by the Dataflow service during execution planning.
2708        "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2709          { # Description of an interstitial value between transforms in an execution
2710              # stage.
2711            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2712            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2713                # source is most closely associated.
2714            "name": "A String", # Dataflow service generated name for this source.
2715          },
2716        ],
2717        "kind": "A String", # Type of tranform this stage is executing.
2718        "name": "A String", # Dataflow service generated name for this stage.
2719        "outputSource": [ # Output sources for this stage.
2720          { # Description of an input or output of an execution stage.
2721            "userName": "A String", # Human-readable name for this source; may be user or system generated.
2722            "sizeBytes": "A String", # Size of the source, if measurable.
2723            "name": "A String", # Dataflow service generated name for this source.
2724            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2725                # source is most closely associated.
2726          },
2727        ],
2728        "inputSource": [ # Input sources for this stage.
2729          { # Description of an input or output of an execution stage.
2730            "userName": "A String", # Human-readable name for this source; may be user or system generated.
2731            "sizeBytes": "A String", # Size of the source, if measurable.
2732            "name": "A String", # Dataflow service generated name for this source.
2733            "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2734                # source is most closely associated.
2735          },
2736        ],
2737        "componentTransform": [ # Transforms that comprise this execution stage.
2738          { # Description of a transform executed as part of an execution stage.
2739            "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2740            "originalTransform": "A String", # User name for the original user transform with which this transform is
2741                # most closely associated.
2742            "name": "A String", # Dataflow service generated name for this source.
2743          },
2744        ],
2745        "id": "A String", # Dataflow service generated id for this stage.
2746      },
2747    ],
2748    "displayData": [ # Pipeline level display data.
2749      { # Data provided with a pipeline or transform to provide descriptive info.
2750        "shortStrValue": "A String", # A possible additional shorter value to display.
2751            # For example a java_class_name_value of com.mypackage.MyDoFn
2752            # will be stored with MyDoFn as the short_str_value and
2753            # com.mypackage.MyDoFn as the java_class_name value.
2754            # short_str_value can be displayed and java_class_name_value
2755            # will be displayed as a tooltip.
2756        "durationValue": "A String", # Contains value if the data is of duration type.
2757        "url": "A String", # An optional full URL.
2758        "floatValue": 3.14, # Contains value if the data is of float type.
2759        "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2760            # language namespace (i.e. python module) which defines the display data.
2761            # This allows a dax monitoring system to specially handle the data
2762            # and perform custom rendering.
2763        "javaClassValue": "A String", # Contains value if the data is of java class type.
2764        "label": "A String", # An optional label to display in a dax UI for the element.
2765        "boolValue": True or False, # Contains value if the data is of a boolean type.
2766        "strValue": "A String", # Contains value if the data is of string type.
2767        "key": "A String", # The key identifying the display data.
2768            # This is intended to be used as a label for the display data
2769            # when viewed in a dax monitoring system.
2770        "int64Value": "A String", # Contains value if the data is of int64 type.
2771        "timestampValue": "A String", # Contains value if the data is of timestamp type.
2772      },
2773    ],
2774  },
2775  "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2776      # callers cannot mutate it.
2777    { # A message describing the state of a particular execution stage.
2778      "executionStageName": "A String", # The name of the execution stage.
2779      "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2780      "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2781    },
2782  ],
2783  "id": "A String", # The unique ID of this job.
2784      #
2785      # This field is set by the Cloud Dataflow service when the Job is
2786      # created, and is immutable for the life of the job.
2787  "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2788      # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2789  "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2790  "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2791      # corresponding name prefixes of the new job.
2792    "a_key": "A String",
2793  },
2794  "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2795    "version": { # A structure describing which components and their versions of the service
2796        # are required in order to run the job.
2797      "a_key": "", # Properties of the object.
2798    },
2799    "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
2800    "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
2801        # at rest, AKA a Customer Managed Encryption Key (CMEK).
2802        #
2803        # Format:
2804        #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
2805    "internalExperiments": { # Experimental settings.
2806      "a_key": "", # Properties of the object. Contains field @type with type URL.
2807    },
2808    "dataset": "A String", # The dataset for the current project where various workflow
2809        # related tables are stored.
2810        #
2811        # The supported resource type is:
2812        #
2813        # Google BigQuery:
2814        #   bigquery.googleapis.com/{dataset}
2815    "experiments": [ # The list of experiments to enable.
2816      "A String",
2817    ],
2818    "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2819    "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2820        # options are passed through the service and are used to recreate the
2821        # SDK pipeline options on the worker in a language agnostic and platform
2822        # independent way.
2823      "a_key": "", # Properties of the object.
2824    },
2825    "userAgent": { # A description of the process that generated the request.
2826      "a_key": "", # Properties of the object.
2827    },
2828    "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2829        # unspecified, the service will attempt to choose a reasonable
2830        # default.  This should be in the form of the API service name,
2831        # e.g. "compute.googleapis.com".
2832    "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2833        # specified in order for the job to have workers.
2834      { # Describes one particular pool of Cloud Dataflow workers to be
2835          # instantiated by the Cloud Dataflow service in order to perform the
2836          # computations required by a job.  Note that a workflow job may use
2837          # multiple pools, in order to match the various computational
2838          # requirements of the various stages of the job.
2839        "diskSourceImage": "A String", # Fully qualified source image for disks.
2840        "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2841            # using the standard Dataflow task runner.  Users should ignore
2842            # this field.
2843          "workflowFileName": "A String", # The file to store the workflow in.
2844          "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2845              # will not be uploaded.
2846              #
2847              # The supported resource type is:
2848              #
2849              # Google Cloud Storage:
2850              #   storage.googleapis.com/{bucket}/{object}
2851              #   bucket.storage.googleapis.com/{object}
2852          "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2853          "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2854            "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2855            "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2856                # "shuffle/v1beta1".
2857            "workerId": "A String", # The ID of the worker running this pipeline.
2858            "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2859                #
2860                # When workers access Google Cloud APIs, they logically do so via
2861                # relative URLs.  If this field is specified, it supplies the base
2862                # URL to use for resolving these relative URLs.  The normative
2863                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2864                # Locators".
2865                #
2866                # If not specified, the default value is "http://www.googleapis.com/"
2867            "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2868                # "dataflow/v1b3/projects".
2869            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2870                # storage.
2871                #
2872                # The supported resource type is:
2873                #
2874                # Google Cloud Storage:
2875                #
2876                #   storage.googleapis.com/{bucket}/{object}
2877                #   bucket.storage.googleapis.com/{object}
2878          },
2879          "vmId": "A String", # The ID string of the VM.
2880          "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2881          "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2882          "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2883              # access the Cloud Dataflow API.
2884            "A String",
2885          ],
2886          "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2887              # taskrunner; e.g. "root".
2888          "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2889              #
2890              # When workers access Google Cloud APIs, they logically do so via
2891              # relative URLs.  If this field is specified, it supplies the base
2892              # URL to use for resolving these relative URLs.  The normative
2893              # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2894              # Locators".
2895              #
2896              # If not specified, the default value is "http://www.googleapis.com/"
2897          "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2898              # taskrunner; e.g. "wheel".
2899          "languageHint": "A String", # The suggested backend language.
2900          "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2901              # console.
2902          "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2903          "logDir": "A String", # The directory on the VM to store logs.
2904          "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2905          "harnessCommand": "A String", # The command to launch the worker harness.
2906          "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2907              # temporary storage.
2908              #
2909              # The supported resource type is:
2910              #
2911              # Google Cloud Storage:
2912              #   storage.googleapis.com/{bucket}/{object}
2913              #   bucket.storage.googleapis.com/{object}
2914          "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2915        },
2916        "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2917            # are supported.
2918        "packages": [ # Packages to be installed on workers.
2919          { # The packages that must be installed in order for a worker to run the
2920              # steps of the Cloud Dataflow job that will be assigned to its worker
2921              # pool.
2922              #
2923              # This is the mechanism by which the Cloud Dataflow SDK causes code to
2924              # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2925              # might use this to install jars containing the user's code and all of the
2926              # various dependencies (libraries, data files, etc.) required in order
2927              # for that code to run.
2928            "location": "A String", # The resource to read the package from. The supported resource type is:
2929                #
2930                # Google Cloud Storage:
2931                #
2932                #   storage.googleapis.com/{bucket}
2933                #   bucket.storage.googleapis.com/
2934            "name": "A String", # The name of the package.
2935          },
2936        ],
2937        "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2938            # service will attempt to choose a reasonable default.
2939        "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2940            # the service will use the network "default".
2941        "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2942            # will attempt to choose a reasonable default.
2943        "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2944            # attempt to choose a reasonable default.
2945        "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2946            # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2947            # `TEARDOWN_NEVER`.
2948            # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2949            # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2950            # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2951            # down.
2952            #
2953            # If the workers are not torn down by the service, they will
2954            # continue to run and use Google Compute Engine VM resources in the
2955            # user's project until they are explicitly terminated by the user.
2956            # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2957            # policy except for small, manually supervised test jobs.
2958            #
2959            # If unknown or unspecified, the service will attempt to choose a reasonable
2960            # default.
2961        "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2962            # Compute Engine API.
2963        "ipConfiguration": "A String", # Configuration for VM IPs.
2964        "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2965            # service will choose a number of threads (according to the number of cores
2966            # on the selected machine type for batch, or 1 by convention for streaming).
2967        "poolArgs": { # Extra arguments for this worker pool.
2968          "a_key": "", # Properties of the object. Contains field @type with type URL.
2969        },
2970        "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2971            # execute the job.  If zero or unspecified, the service will
2972            # attempt to choose a reasonable default.
2973        "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2974            # harness, residing in Google Container Registry.
2975        "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2976            # the form "regions/REGION/subnetworks/SUBNETWORK".
2977        "dataDisks": [ # Data disks that are used by a VM in this workflow.
2978          { # Describes the data disk used by a workflow job.
2979            "mountPoint": "A String", # Directory in a VM where disk is mounted.
2980            "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2981                # attempt to choose a reasonable default.
2982            "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2983                # must be a disk type appropriate to the project and zone in which
2984                # the workers will run.  If unknown or unspecified, the service
2985                # will attempt to choose a reasonable default.
2986                #
2987                # For example, the standard persistent disk type is a resource name
2988                # typically ending in "pd-standard".  If SSD persistent disks are
2989                # available, the resource name typically ends with "pd-ssd".  The
2990                # actual valid values are defined the Google Compute Engine API,
2991                # not by the Cloud Dataflow API; consult the Google Compute Engine
2992                # documentation for more information about determining the set of
2993                # available disk types for a particular project and zone.
2994                #
2995                # Google Compute Engine Disk types are local to a particular
2996                # project in a particular zone, and so the resource name will
2997                # typically look something like this:
2998                #
2999                # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
3000          },
3001        ],
3002        "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3003          "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3004          "algorithm": "A String", # The algorithm to use for autoscaling.
3005        },
3006        "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
3007            # select a default set of packages which are useful to worker
3008            # harnesses written in a particular language.
3009        "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
3010            # attempt to choose a reasonable default.
3011        "metadata": { # Metadata to set on the Google Compute Engine VMs.
3012          "a_key": "A String",
3013        },
3014      },
3015    ],
3016    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3017        # storage.  The system will append the suffix "/temp-{JOBNAME} to
3018        # this resource prefix, where {JOBNAME} is the value of the
3019        # job_name field.  The resulting bucket and object prefix is used
3020        # as the prefix of the resources used to store temporary data
3021        # needed during the job execution.  NOTE: This will override the
3022        # value in taskrunner_settings.
3023        # The supported resource type is:
3024        #
3025        # Google Cloud Storage:
3026        #
3027        #   storage.googleapis.com/{bucket}/{object}
3028        #   bucket.storage.googleapis.com/{object}
3029  },
3030  "location": "A String", # The [regional endpoint]
3031      # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
3032      # contains this job.
3033  "tempFiles": [ # A set of files the system should be aware of that are used
3034      # for temporary storage. These temporary files will be
3035      # removed on job completion.
3036      # No duplicates are allowed.
3037      # No file patterns are supported.
3038      #
3039      # The supported files are:
3040      #
3041      # Google Cloud Storage:
3042      #
3043      #    storage.googleapis.com/{bucket}/{object}
3044      #    bucket.storage.googleapis.com/{object}
3045    "A String",
3046  ],
3047  "type": "A String", # The type of Cloud Dataflow job.
3048  "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
3049      # If this field is set, the service will ensure its uniqueness.
3050      # The request to create a job will fail if the service has knowledge of a
3051      # previously submitted job with the same client's ID and job name.
3052      # The caller may use this field to ensure idempotence of job
3053      # creation across retried attempts to create a job.
3054      # By default, the field is empty and, in that case, the service ignores it.
3055  "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
3056      # snapshot.
3057  "stepsLocation": "A String", # The GCS location where the steps are stored.
3058  "currentStateTime": "A String", # The timestamp associated with the current state.
3059  "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
3060      # Flexible resource scheduling jobs are started with some delay after job
3061      # creation, so start_time is unset before start and is updated when the
3062      # job is started by the Cloud Dataflow service. For other jobs, start_time
3063      # always equals to create_time and is immutable and set by the Cloud Dataflow
3064      # service.
3065  "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3066      # Cloud Dataflow service.
3067  "requestedState": "A String", # The job's requested state.
3068      #
3069      # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
3070      # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
3071      # also be used to directly set a job's requested state to
3072      # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
3073      # job if it has not already reached a terminal state.
3074  "name": "A String", # The user-specified Cloud Dataflow job name.
3075      #
3076      # Only one Job with a given name may exist in a project at any
3077      # given time. If a caller attempts to create a Job with the same
3078      # name as an already-existing Job, the attempt returns the
3079      # existing Job.
3080      #
3081      # The name must match the regular expression
3082      # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
3083  "steps": [ # Exactly one of step or steps_location should be specified.
3084      #
3085      # The top-level steps that constitute the entire job.
3086    { # Defines a particular step within a Cloud Dataflow job.
3087        #
3088        # A job consists of multiple steps, each of which performs some
3089        # specific operation as part of the overall job.  Data is typically
3090        # passed from one step to another as part of the job.
3091        #
3092        # Here's an example of a sequence of steps which together implement a
3093        # Map-Reduce job:
3094        #
3095        #   * Read a collection of data from some source, parsing the
3096        #     collection's elements.
3097        #
3098        #   * Validate the elements.
3099        #
3100        #   * Apply a user-defined function to map each element to some value
3101        #     and extract an element-specific key value.
3102        #
3103        #   * Group elements with the same key into a single element with
3104        #     that key, transforming a multiply-keyed collection into a
3105        #     uniquely-keyed collection.
3106        #
3107        #   * Write the elements out to some data sink.
3108        #
3109        # Note that the Cloud Dataflow service may be used to run many different
3110        # types of jobs, not just Map-Reduce.
3111      "kind": "A String", # The kind of step in the Cloud Dataflow job.
3112      "properties": { # Named properties associated with the step. Each kind of
3113          # predefined step has its own required set of properties.
3114          # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3115        "a_key": "", # Properties of the object.
3116      },
3117      "name": "A String", # The name that identifies the step. This must be unique for each
3118          # step with respect to all other steps in the Cloud Dataflow job.
3119    },
3120  ],
3121  "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3122      # of the job it replaced.
3123      #
3124      # When sending a `CreateJobRequest`, you can update a job by specifying it
3125      # here. The job named here is stopped, and its intermediate state is
3126      # transferred to this job.
3127  "currentState": "A String", # The current state of the job.
3128      #
3129      # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3130      # specified.
3131      #
3132      # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3133      # terminal state. After a job has reached a terminal state, no
3134      # further state updates may be made.
3135      #
3136      # This field may be mutated by the Cloud Dataflow service;
3137      # callers cannot mutate it.
3138  "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3139      # isn't contained in the submitted job.
3140    "stages": { # A mapping from each stage to the information about that stage.
3141      "a_key": { # Contains information about how a particular
3142          # google.dataflow.v1beta3.Step will be executed.
3143        "stepName": [ # The steps associated with the execution stage.
3144            # Note that stages may have several steps, and that a given step
3145            # might be run by more than one stage.
3146          "A String",
3147        ],
3148      },
3149    },
3150  },
3151}
3152
3153  x__xgafv: string, V1 error format.
3154    Allowed values
3155      1 - v1 error format
3156      2 - v2 error format
3157
3158Returns:
3159  An object of the form:
3160
3161    { # Defines a job to be run by the Cloud Dataflow service.
3162    "labels": { # User-defined labels for this job.
3163        #
3164        # The labels map can contain no more than 64 entries.  Entries of the labels
3165        # map are UTF8 strings that comply with the following restrictions:
3166        #
3167        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
3168        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
3169        # * Both keys and values are additionally constrained to be <= 128 bytes in
3170        # size.
3171      "a_key": "A String",
3172    },
3173    "jobMetadata": { # Metadata available primarily for filtering jobs. Will be included in the # This field is populated by the Dataflow service to support filtering jobs
3174        # by the metadata values provided here. Populated for ListJobs and all GetJob
3175        # views SUMMARY and higher.
3176        # ListJob response and Job SUMMARY view.
3177      "sdkVersion": { # The version of the SDK used to run the job. # The SDK version used to run the job.
3178        "versionDisplayName": "A String", # A readable string describing the version of the SDK.
3179        "version": "A String", # The version of the SDK used to run the job.
3180        "sdkSupportStatus": "A String", # The support status for this SDK version.
3181      },
3182      "pubsubDetails": [ # Identification of a PubSub source used in the Dataflow job.
3183        { # Metadata for a PubSub connector used by the job.
3184          "topic": "A String", # Topic accessed in the connection.
3185          "subscription": "A String", # Subscription used in the connection.
3186        },
3187      ],
3188      "datastoreDetails": [ # Identification of a Datastore source used in the Dataflow job.
3189        { # Metadata for a Datastore connector used by the job.
3190          "projectId": "A String", # ProjectId accessed in the connection.
3191          "namespace": "A String", # Namespace used in the connection.
3192        },
3193      ],
3194      "fileDetails": [ # Identification of a File source used in the Dataflow job.
3195        { # Metadata for a File connector used by the job.
3196          "filePattern": "A String", # File Pattern used to access files by the connector.
3197        },
3198      ],
3199      "spannerDetails": [ # Identification of a Spanner source used in the Dataflow job.
3200        { # Metadata for a Spanner connector used by the job.
3201          "instanceId": "A String", # InstanceId accessed in the connection.
3202          "projectId": "A String", # ProjectId accessed in the connection.
3203          "databaseId": "A String", # DatabaseId accessed in the connection.
3204        },
3205      ],
3206      "bigTableDetails": [ # Identification of a BigTable source used in the Dataflow job.
3207        { # Metadata for a BigTable connector used by the job.
3208          "instanceId": "A String", # InstanceId accessed in the connection.
3209          "projectId": "A String", # ProjectId accessed in the connection.
3210          "tableId": "A String", # TableId accessed in the connection.
3211        },
3212      ],
3213      "bigqueryDetails": [ # Identification of a BigQuery source used in the Dataflow job.
3214        { # Metadata for a BigQuery connector used by the job.
3215          "projectId": "A String", # Project accessed in the connection.
3216          "dataset": "A String", # Dataset accessed in the connection.
3217          "table": "A String", # Table accessed in the connection.
3218          "query": "A String", # Query used to access data in the connection.
3219        },
3220      ],
3221    },
3222    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3223        # A description of the user pipeline and stages through which it is executed.
3224        # Created by Cloud Dataflow service.  Only retrieved with
3225        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3226        # form.  This data is provided by the Dataflow service for ease of visualizing
3227        # the pipeline and interpreting Dataflow provided metrics.
3228      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3229        { # Description of the type, names/ids, and input/outputs for a transform.
3230          "kind": "A String", # Type of transform.
3231          "name": "A String", # User provided name for this transform instance.
3232          "inputCollectionName": [ # User names for all collection inputs to this transform.
3233            "A String",
3234          ],
3235          "displayData": [ # Transform-specific display data.
3236            { # Data provided with a pipeline or transform to provide descriptive info.
3237              "shortStrValue": "A String", # A possible additional shorter value to display.
3238                  # For example a java_class_name_value of com.mypackage.MyDoFn
3239                  # will be stored with MyDoFn as the short_str_value and
3240                  # com.mypackage.MyDoFn as the java_class_name value.
3241                  # short_str_value can be displayed and java_class_name_value
3242                  # will be displayed as a tooltip.
3243              "durationValue": "A String", # Contains value if the data is of duration type.
3244              "url": "A String", # An optional full URL.
3245              "floatValue": 3.14, # Contains value if the data is of float type.
3246              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3247                  # language namespace (i.e. python module) which defines the display data.
3248                  # This allows a dax monitoring system to specially handle the data
3249                  # and perform custom rendering.
3250              "javaClassValue": "A String", # Contains value if the data is of java class type.
3251              "label": "A String", # An optional label to display in a dax UI for the element.
3252              "boolValue": True or False, # Contains value if the data is of a boolean type.
3253              "strValue": "A String", # Contains value if the data is of string type.
3254              "key": "A String", # The key identifying the display data.
3255                  # This is intended to be used as a label for the display data
3256                  # when viewed in a dax monitoring system.
3257              "int64Value": "A String", # Contains value if the data is of int64 type.
3258              "timestampValue": "A String", # Contains value if the data is of timestamp type.
3259            },
3260          ],
3261          "outputCollectionName": [ # User  names for all collection outputs to this transform.
3262            "A String",
3263          ],
3264          "id": "A String", # SDK generated id of this transform instance.
3265        },
3266      ],
3267      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3268        { # Description of the composing transforms, names/ids, and input/outputs of a
3269            # stage of execution.  Some composing transforms and sources may have been
3270            # generated by the Dataflow service during execution planning.
3271          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3272            { # Description of an interstitial value between transforms in an execution
3273                # stage.
3274              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3275              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3276                  # source is most closely associated.
3277              "name": "A String", # Dataflow service generated name for this source.
3278            },
3279          ],
3280          "kind": "A String", # Type of tranform this stage is executing.
3281          "name": "A String", # Dataflow service generated name for this stage.
3282          "outputSource": [ # Output sources for this stage.
3283            { # Description of an input or output of an execution stage.
3284              "userName": "A String", # Human-readable name for this source; may be user or system generated.
3285              "sizeBytes": "A String", # Size of the source, if measurable.
3286              "name": "A String", # Dataflow service generated name for this source.
3287              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3288                  # source is most closely associated.
3289            },
3290          ],
3291          "inputSource": [ # Input sources for this stage.
3292            { # Description of an input or output of an execution stage.
3293              "userName": "A String", # Human-readable name for this source; may be user or system generated.
3294              "sizeBytes": "A String", # Size of the source, if measurable.
3295              "name": "A String", # Dataflow service generated name for this source.
3296              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3297                  # source is most closely associated.
3298            },
3299          ],
3300          "componentTransform": [ # Transforms that comprise this execution stage.
3301            { # Description of a transform executed as part of an execution stage.
3302              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3303              "originalTransform": "A String", # User name for the original user transform with which this transform is
3304                  # most closely associated.
3305              "name": "A String", # Dataflow service generated name for this source.
3306            },
3307          ],
3308          "id": "A String", # Dataflow service generated id for this stage.
3309        },
3310      ],
3311      "displayData": [ # Pipeline level display data.
3312        { # Data provided with a pipeline or transform to provide descriptive info.
3313          "shortStrValue": "A String", # A possible additional shorter value to display.
3314              # For example a java_class_name_value of com.mypackage.MyDoFn
3315              # will be stored with MyDoFn as the short_str_value and
3316              # com.mypackage.MyDoFn as the java_class_name value.
3317              # short_str_value can be displayed and java_class_name_value
3318              # will be displayed as a tooltip.
3319          "durationValue": "A String", # Contains value if the data is of duration type.
3320          "url": "A String", # An optional full URL.
3321          "floatValue": 3.14, # Contains value if the data is of float type.
3322          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3323              # language namespace (i.e. python module) which defines the display data.
3324              # This allows a dax monitoring system to specially handle the data
3325              # and perform custom rendering.
3326          "javaClassValue": "A String", # Contains value if the data is of java class type.
3327          "label": "A String", # An optional label to display in a dax UI for the element.
3328          "boolValue": True or False, # Contains value if the data is of a boolean type.
3329          "strValue": "A String", # Contains value if the data is of string type.
3330          "key": "A String", # The key identifying the display data.
3331              # This is intended to be used as a label for the display data
3332              # when viewed in a dax monitoring system.
3333          "int64Value": "A String", # Contains value if the data is of int64 type.
3334          "timestampValue": "A String", # Contains value if the data is of timestamp type.
3335        },
3336      ],
3337    },
3338    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3339        # callers cannot mutate it.
3340      { # A message describing the state of a particular execution stage.
3341        "executionStageName": "A String", # The name of the execution stage.
3342        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3343        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3344      },
3345    ],
3346    "id": "A String", # The unique ID of this job.
3347        #
3348        # This field is set by the Cloud Dataflow service when the Job is
3349        # created, and is immutable for the life of the job.
3350    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
3351        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
3352    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
3353    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
3354        # corresponding name prefixes of the new job.
3355      "a_key": "A String",
3356    },
3357    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
3358      "version": { # A structure describing which components and their versions of the service
3359          # are required in order to run the job.
3360        "a_key": "", # Properties of the object.
3361      },
3362      "flexResourceSchedulingGoal": "A String", # Which Flexible Resource Scheduling mode to run in.
3363      "serviceKmsKeyName": "A String", # If set, contains the Cloud KMS key identifier used to encrypt data
3364          # at rest, AKA a Customer Managed Encryption Key (CMEK).
3365          #
3366          # Format:
3367          #   projects/PROJECT_ID/locations/LOCATION/keyRings/KEY_RING/cryptoKeys/KEY
3368      "internalExperiments": { # Experimental settings.
3369        "a_key": "", # Properties of the object. Contains field @type with type URL.
3370      },
3371      "dataset": "A String", # The dataset for the current project where various workflow
3372          # related tables are stored.
3373          #
3374          # The supported resource type is:
3375          #
3376          # Google BigQuery:
3377          #   bigquery.googleapis.com/{dataset}
3378      "experiments": [ # The list of experiments to enable.
3379        "A String",
3380      ],
3381      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
3382      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
3383          # options are passed through the service and are used to recreate the
3384          # SDK pipeline options on the worker in a language agnostic and platform
3385          # independent way.
3386        "a_key": "", # Properties of the object.
3387      },
3388      "userAgent": { # A description of the process that generated the request.
3389        "a_key": "", # Properties of the object.
3390      },
3391      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
3392          # unspecified, the service will attempt to choose a reasonable
3393          # default.  This should be in the form of the API service name,
3394          # e.g. "compute.googleapis.com".
3395      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
3396          # specified in order for the job to have workers.
3397        { # Describes one particular pool of Cloud Dataflow workers to be
3398            # instantiated by the Cloud Dataflow service in order to perform the
3399            # computations required by a job.  Note that a workflow job may use
3400            # multiple pools, in order to match the various computational
3401            # requirements of the various stages of the job.
3402          "diskSourceImage": "A String", # Fully qualified source image for disks.
3403          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
3404              # using the standard Dataflow task runner.  Users should ignore
3405              # this field.
3406            "workflowFileName": "A String", # The file to store the workflow in.
3407            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
3408                # will not be uploaded.
3409                #
3410                # The supported resource type is:
3411                #
3412                # Google Cloud Storage:
3413                #   storage.googleapis.com/{bucket}/{object}
3414                #   bucket.storage.googleapis.com/{object}
3415            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
3416            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
3417              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
3418              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
3419                  # "shuffle/v1beta1".
3420              "workerId": "A String", # The ID of the worker running this pipeline.
3421              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
3422                  #
3423                  # When workers access Google Cloud APIs, they logically do so via
3424                  # relative URLs.  If this field is specified, it supplies the base
3425                  # URL to use for resolving these relative URLs.  The normative
3426                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
3427                  # Locators".
3428                  #
3429                  # If not specified, the default value is "http://www.googleapis.com/"
3430              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
3431                  # "dataflow/v1b3/projects".
3432              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3433                  # storage.
3434                  #
3435                  # The supported resource type is:
3436                  #
3437                  # Google Cloud Storage:
3438                  #
3439                  #   storage.googleapis.com/{bucket}/{object}
3440                  #   bucket.storage.googleapis.com/{object}
3441            },
3442            "vmId": "A String", # The ID string of the VM.
3443            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
3444            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
3445            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
3446                # access the Cloud Dataflow API.
3447              "A String",
3448            ],
3449            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
3450                # taskrunner; e.g. "root".
3451            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
3452                #
3453                # When workers access Google Cloud APIs, they logically do so via
3454                # relative URLs.  If this field is specified, it supplies the base
3455                # URL to use for resolving these relative URLs.  The normative
3456                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
3457                # Locators".
3458                #
3459                # If not specified, the default value is "http://www.googleapis.com/"
3460            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
3461                # taskrunner; e.g. "wheel".
3462            "languageHint": "A String", # The suggested backend language.
3463            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
3464                # console.
3465            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
3466            "logDir": "A String", # The directory on the VM to store logs.
3467            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
3468            "harnessCommand": "A String", # The command to launch the worker harness.
3469            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
3470                # temporary storage.
3471                #
3472                # The supported resource type is:
3473                #
3474                # Google Cloud Storage:
3475                #   storage.googleapis.com/{bucket}/{object}
3476                #   bucket.storage.googleapis.com/{object}
3477            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
3478          },
3479          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
3480              # are supported.
3481          "packages": [ # Packages to be installed on workers.
3482            { # The packages that must be installed in order for a worker to run the
3483                # steps of the Cloud Dataflow job that will be assigned to its worker
3484                # pool.
3485                #
3486                # This is the mechanism by which the Cloud Dataflow SDK causes code to
3487                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
3488                # might use this to install jars containing the user's code and all of the
3489                # various dependencies (libraries, data files, etc.) required in order
3490                # for that code to run.
3491              "location": "A String", # The resource to read the package from. The supported resource type is:
3492                  #
3493                  # Google Cloud Storage:
3494                  #
3495                  #   storage.googleapis.com/{bucket}
3496                  #   bucket.storage.googleapis.com/
3497              "name": "A String", # The name of the package.
3498            },
3499          ],
3500          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
3501              # service will attempt to choose a reasonable default.
3502          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
3503              # the service will use the network "default".
3504          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
3505              # will attempt to choose a reasonable default.
3506          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
3507              # attempt to choose a reasonable default.
3508          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
3509              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
3510              # `TEARDOWN_NEVER`.
3511              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
3512              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
3513              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
3514              # down.
3515              #
3516              # If the workers are not torn down by the service, they will
3517              # continue to run and use Google Compute Engine VM resources in the
3518              # user's project until they are explicitly terminated by the user.
3519              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
3520              # policy except for small, manually supervised test jobs.
3521              #
3522              # If unknown or unspecified, the service will attempt to choose a reasonable
3523              # default.
3524          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
3525              # Compute Engine API.
3526          "ipConfiguration": "A String", # Configuration for VM IPs.
3527          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
3528              # service will choose a number of threads (according to the number of cores
3529              # on the selected machine type for batch, or 1 by convention for streaming).
3530          "poolArgs": { # Extra arguments for this worker pool.
3531            "a_key": "", # Properties of the object. Contains field @type with type URL.
3532          },
3533          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
3534              # execute the job.  If zero or unspecified, the service will
3535              # attempt to choose a reasonable default.
3536          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
3537              # harness, residing in Google Container Registry.
3538          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
3539              # the form "regions/REGION/subnetworks/SUBNETWORK".
3540          "dataDisks": [ # Data disks that are used by a VM in this workflow.
3541            { # Describes the data disk used by a workflow job.
3542              "mountPoint": "A String", # Directory in a VM where disk is mounted.
3543              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
3544                  # attempt to choose a reasonable default.
3545              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
3546                  # must be a disk type appropriate to the project and zone in which
3547                  # the workers will run.  If unknown or unspecified, the service
3548                  # will attempt to choose a reasonable default.
3549                  #
3550                  # For example, the standard persistent disk type is a resource name
3551                  # typically ending in "pd-standard".  If SSD persistent disks are
3552                  # available, the resource name typically ends with "pd-ssd".  The
3553                  # actual valid values are defined the Google Compute Engine API,
3554                  # not by the Cloud Dataflow API; consult the Google Compute Engine
3555                  # documentation for more information about determining the set of
3556                  # available disk types for a particular project and zone.
3557                  #
3558                  # Google Compute Engine Disk types are local to a particular
3559                  # project in a particular zone, and so the resource name will
3560                  # typically look something like this:
3561                  #
3562                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
3563            },
3564          ],
3565          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3566            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3567            "algorithm": "A String", # The algorithm to use for autoscaling.
3568          },
3569          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
3570              # select a default set of packages which are useful to worker
3571              # harnesses written in a particular language.
3572          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
3573              # attempt to choose a reasonable default.
3574          "metadata": { # Metadata to set on the Google Compute Engine VMs.
3575            "a_key": "A String",
3576          },
3577        },
3578      ],
3579      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
3580          # storage.  The system will append the suffix "/temp-{JOBNAME} to
3581          # this resource prefix, where {JOBNAME} is the value of the
3582          # job_name field.  The resulting bucket and object prefix is used
3583          # as the prefix of the resources used to store temporary data
3584          # needed during the job execution.  NOTE: This will override the
3585          # value in taskrunner_settings.
3586          # The supported resource type is:
3587          #
3588          # Google Cloud Storage:
3589          #
3590          #   storage.googleapis.com/{bucket}/{object}
3591          #   bucket.storage.googleapis.com/{object}
3592    },
3593    "location": "A String", # The [regional endpoint]
3594        # (https://cloud.google.com/dataflow/docs/concepts/regional-endpoints) that
3595        # contains this job.
3596    "tempFiles": [ # A set of files the system should be aware of that are used
3597        # for temporary storage. These temporary files will be
3598        # removed on job completion.
3599        # No duplicates are allowed.
3600        # No file patterns are supported.
3601        #
3602        # The supported files are:
3603        #
3604        # Google Cloud Storage:
3605        #
3606        #    storage.googleapis.com/{bucket}/{object}
3607        #    bucket.storage.googleapis.com/{object}
3608      "A String",
3609    ],
3610    "type": "A String", # The type of Cloud Dataflow job.
3611    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
3612        # If this field is set, the service will ensure its uniqueness.
3613        # The request to create a job will fail if the service has knowledge of a
3614        # previously submitted job with the same client's ID and job name.
3615        # The caller may use this field to ensure idempotence of job
3616        # creation across retried attempts to create a job.
3617        # By default, the field is empty and, in that case, the service ignores it.
3618    "createdFromSnapshotId": "A String", # If this is specified, the job's initial state is populated from the given
3619        # snapshot.
3620    "stepsLocation": "A String", # The GCS location where the steps are stored.
3621    "currentStateTime": "A String", # The timestamp associated with the current state.
3622    "startTime": "A String", # The timestamp when the job was started (transitioned to JOB_STATE_PENDING).
3623        # Flexible resource scheduling jobs are started with some delay after job
3624        # creation, so start_time is unset before start and is updated when the
3625        # job is started by the Cloud Dataflow service. For other jobs, start_time
3626        # always equals to create_time and is immutable and set by the Cloud Dataflow
3627        # service.
3628    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3629        # Cloud Dataflow service.
3630    "requestedState": "A String", # The job's requested state.
3631        #
3632        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
3633        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
3634        # also be used to directly set a job's requested state to
3635        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
3636        # job if it has not already reached a terminal state.
3637    "name": "A String", # The user-specified Cloud Dataflow job name.
3638        #
3639        # Only one Job with a given name may exist in a project at any
3640        # given time. If a caller attempts to create a Job with the same
3641        # name as an already-existing Job, the attempt returns the
3642        # existing Job.
3643        #
3644        # The name must match the regular expression
3645        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
3646    "steps": [ # Exactly one of step or steps_location should be specified.
3647        #
3648        # The top-level steps that constitute the entire job.
3649      { # Defines a particular step within a Cloud Dataflow job.
3650          #
3651          # A job consists of multiple steps, each of which performs some
3652          # specific operation as part of the overall job.  Data is typically
3653          # passed from one step to another as part of the job.
3654          #
3655          # Here's an example of a sequence of steps which together implement a
3656          # Map-Reduce job:
3657          #
3658          #   * Read a collection of data from some source, parsing the
3659          #     collection's elements.
3660          #
3661          #   * Validate the elements.
3662          #
3663          #   * Apply a user-defined function to map each element to some value
3664          #     and extract an element-specific key value.
3665          #
3666          #   * Group elements with the same key into a single element with
3667          #     that key, transforming a multiply-keyed collection into a
3668          #     uniquely-keyed collection.
3669          #
3670          #   * Write the elements out to some data sink.
3671          #
3672          # Note that the Cloud Dataflow service may be used to run many different
3673          # types of jobs, not just Map-Reduce.
3674        "kind": "A String", # The kind of step in the Cloud Dataflow job.
3675        "properties": { # Named properties associated with the step. Each kind of
3676            # predefined step has its own required set of properties.
3677            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3678          "a_key": "", # Properties of the object.
3679        },
3680        "name": "A String", # The name that identifies the step. This must be unique for each
3681            # step with respect to all other steps in the Cloud Dataflow job.
3682      },
3683    ],
3684    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3685        # of the job it replaced.
3686        #
3687        # When sending a `CreateJobRequest`, you can update a job by specifying it
3688        # here. The job named here is stopped, and its intermediate state is
3689        # transferred to this job.
3690    "currentState": "A String", # The current state of the job.
3691        #
3692        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
3693        # specified.
3694        #
3695        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
3696        # terminal state. After a job has reached a terminal state, no
3697        # further state updates may be made.
3698        #
3699        # This field may be mutated by the Cloud Dataflow service;
3700        # callers cannot mutate it.
3701    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3702        # isn't contained in the submitted job.
3703      "stages": { # A mapping from each stage to the information about that stage.
3704        "a_key": { # Contains information about how a particular
3705            # google.dataflow.v1beta3.Step will be executed.
3706          "stepName": [ # The steps associated with the execution stage.
3707              # Note that stages may have several steps, and that a given step
3708              # might be run by more than one stage.
3709            "A String",
3710          ],
3711        },
3712      },
3713    },
3714  }</pre>
3715</div>
3716
3717</body></html>