1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="dataflow_v1b3.projects.locations.jobs.debug.html">debug()</a></code>
79</p>
80<p class="firstline">Returns the debug Resource.</p>
81
82<p class="toc_element">
83  <code><a href="dataflow_v1b3.projects.locations.jobs.messages.html">messages()</a></code>
84</p>
85<p class="firstline">Returns the messages Resource.</p>
86
87<p class="toc_element">
88  <code><a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems()</a></code>
89</p>
90<p class="firstline">Returns the workItems Resource.</p>
91
92<p class="toc_element">
93  <code><a href="#create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
94<p class="firstline">Creates a Cloud Dataflow job.</p>
95<p class="toc_element">
96  <code><a href="#get">get(projectId, location, jobId, x__xgafv=None, view=None)</a></code></p>
97<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
98<p class="toc_element">
99  <code><a href="#getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</a></code></p>
100<p class="firstline">Request the job status.</p>
101<p class="toc_element">
102  <code><a href="#list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</a></code></p>
103<p class="firstline">List the jobs of a project.</p>
104<p class="toc_element">
105  <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
106<p class="firstline">Retrieves the next page of results.</p>
107<p class="toc_element">
108  <code><a href="#update">update(projectId, location, jobId, body, x__xgafv=None)</a></code></p>
109<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
110<h3>Method Details</h3>
111<div class="method">
112    <code class="details" id="create">create(projectId, location, body, x__xgafv=None, replaceJobId=None, view=None)</code>
113  <pre>Creates a Cloud Dataflow job.
114
115Args:
116  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
117  location: string, The location that contains this job. (required)
118  body: object, The request body. (required)
119    The object takes the form of:
120
121{ # Defines a job to be run by the Cloud Dataflow service.
122    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
123        # If this field is set, the service will ensure its uniqueness.
124        # The request to create a job will fail if the service has knowledge of a
125        # previously submitted job with the same client's ID and job name.
126        # The caller may use this field to ensure idempotence of job
127        # creation across retried attempts to create a job.
128        # By default, the field is empty and, in that case, the service ignores it.
129    "requestedState": "A String", # The job's requested state.
130        #
131        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
132        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
133        # also be used to directly set a job's requested state to
134        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
135        # job if it has not already reached a terminal state.
136    "name": "A String", # The user-specified Cloud Dataflow job name.
137        #
138        # Only one Job with a given name may exist in a project at any
139        # given time. If a caller attempts to create a Job with the same
140        # name as an already-existing Job, the attempt returns the
141        # existing Job.
142        #
143        # The name must match the regular expression
144        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
145    "location": "A String", # The location that contains this job.
146    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
147        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
148    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
149    "currentState": "A String", # The current state of the job.
150        #
151        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
152        # specified.
153        #
154        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
155        # terminal state. After a job has reached a terminal state, no
156        # further state updates may be made.
157        #
158        # This field may be mutated by the Cloud Dataflow service;
159        # callers cannot mutate it.
160    "labels": { # User-defined labels for this job.
161        #
162        # The labels map can contain no more than 64 entries.  Entries of the labels
163        # map are UTF8 strings that comply with the following restrictions:
164        #
165        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
166        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
167        # * Both keys and values are additionally constrained to be <= 128 bytes in
168        # size.
169      "a_key": "A String",
170    },
171    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
172        # corresponding name prefixes of the new job.
173      "a_key": "A String",
174    },
175    "id": "A String", # The unique ID of this job.
176        #
177        # This field is set by the Cloud Dataflow service when the Job is
178        # created, and is immutable for the life of the job.
179    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
180      "version": { # A structure describing which components and their versions of the service
181          # are required in order to run the job.
182        "a_key": "", # Properties of the object.
183      },
184      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
185          # storage.  The system will append the suffix "/temp-{JOBNAME} to
186          # this resource prefix, where {JOBNAME} is the value of the
187          # job_name field.  The resulting bucket and object prefix is used
188          # as the prefix of the resources used to store temporary data
189          # needed during the job execution.  NOTE: This will override the
190          # value in taskrunner_settings.
191          # The supported resource type is:
192          #
193          # Google Cloud Storage:
194          #
195          #   storage.googleapis.com/{bucket}/{object}
196          #   bucket.storage.googleapis.com/{object}
197      "internalExperiments": { # Experimental settings.
198        "a_key": "", # Properties of the object. Contains field @type with type URL.
199      },
200      "dataset": "A String", # The dataset for the current project where various workflow
201          # related tables are stored.
202          #
203          # The supported resource type is:
204          #
205          # Google BigQuery:
206          #   bigquery.googleapis.com/{dataset}
207      "experiments": [ # The list of experiments to enable.
208        "A String",
209      ],
210      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
211      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
212          # options are passed through the service and are used to recreate the
213          # SDK pipeline options on the worker in a language agnostic and platform
214          # independent way.
215        "a_key": "", # Properties of the object.
216      },
217      "userAgent": { # A description of the process that generated the request.
218        "a_key": "", # Properties of the object.
219      },
220      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
221          # unspecified, the service will attempt to choose a reasonable
222          # default.  This should be in the form of the API service name,
223          # e.g. "compute.googleapis.com".
224      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
225          # specified in order for the job to have workers.
226        { # Describes one particular pool of Cloud Dataflow workers to be
227            # instantiated by the Cloud Dataflow service in order to perform the
228            # computations required by a job.  Note that a workflow job may use
229            # multiple pools, in order to match the various computational
230            # requirements of the various stages of the job.
231          "diskSourceImage": "A String", # Fully qualified source image for disks.
232          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
233              # using the standard Dataflow task runner.  Users should ignore
234              # this field.
235            "workflowFileName": "A String", # The file to store the workflow in.
236            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
237                # will not be uploaded.
238                #
239                # The supported resource type is:
240                #
241                # Google Cloud Storage:
242                #   storage.googleapis.com/{bucket}/{object}
243                #   bucket.storage.googleapis.com/{object}
244            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
245            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
246              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
247              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
248                  # "shuffle/v1beta1".
249              "workerId": "A String", # The ID of the worker running this pipeline.
250              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
251                  #
252                  # When workers access Google Cloud APIs, they logically do so via
253                  # relative URLs.  If this field is specified, it supplies the base
254                  # URL to use for resolving these relative URLs.  The normative
255                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
256                  # Locators".
257                  #
258                  # If not specified, the default value is "http://www.googleapis.com/"
259              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
260                  # "dataflow/v1b3/projects".
261              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
262                  # storage.
263                  #
264                  # The supported resource type is:
265                  #
266                  # Google Cloud Storage:
267                  #
268                  #   storage.googleapis.com/{bucket}/{object}
269                  #   bucket.storage.googleapis.com/{object}
270            },
271            "vmId": "A String", # The ID string of the VM.
272            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
273            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
274            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
275                # access the Cloud Dataflow API.
276              "A String",
277            ],
278            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
279                # taskrunner; e.g. "root".
280            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
281                #
282                # When workers access Google Cloud APIs, they logically do so via
283                # relative URLs.  If this field is specified, it supplies the base
284                # URL to use for resolving these relative URLs.  The normative
285                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
286                # Locators".
287                #
288                # If not specified, the default value is "http://www.googleapis.com/"
289            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
290                # taskrunner; e.g. "wheel".
291            "languageHint": "A String", # The suggested backend language.
292            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
293                # console.
294            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
295            "logDir": "A String", # The directory on the VM to store logs.
296            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
297            "harnessCommand": "A String", # The command to launch the worker harness.
298            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
299                # temporary storage.
300                #
301                # The supported resource type is:
302                #
303                # Google Cloud Storage:
304                #   storage.googleapis.com/{bucket}/{object}
305                #   bucket.storage.googleapis.com/{object}
306            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
307          },
308          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
309              # are supported.
310          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
311              # service will attempt to choose a reasonable default.
312          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
313              # the service will use the network "default".
314          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
315              # will attempt to choose a reasonable default.
316          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
317              # attempt to choose a reasonable default.
318          "dataDisks": [ # Data disks that are used by a VM in this workflow.
319            { # Describes the data disk used by a workflow job.
320              "mountPoint": "A String", # Directory in a VM where disk is mounted.
321              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
322                  # attempt to choose a reasonable default.
323              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
324                  # must be a disk type appropriate to the project and zone in which
325                  # the workers will run.  If unknown or unspecified, the service
326                  # will attempt to choose a reasonable default.
327                  #
328                  # For example, the standard persistent disk type is a resource name
329                  # typically ending in "pd-standard".  If SSD persistent disks are
330                  # available, the resource name typically ends with "pd-ssd".  The
331                  # actual valid values are defined the Google Compute Engine API,
332                  # not by the Cloud Dataflow API; consult the Google Compute Engine
333                  # documentation for more information about determining the set of
334                  # available disk types for a particular project and zone.
335                  #
336                  # Google Compute Engine Disk types are local to a particular
337                  # project in a particular zone, and so the resource name will
338                  # typically look something like this:
339                  #
340                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
341            },
342          ],
343          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
344              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
345              # `TEARDOWN_NEVER`.
346              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
347              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
348              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
349              # down.
350              #
351              # If the workers are not torn down by the service, they will
352              # continue to run and use Google Compute Engine VM resources in the
353              # user's project until they are explicitly terminated by the user.
354              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
355              # policy except for small, manually supervised test jobs.
356              #
357              # If unknown or unspecified, the service will attempt to choose a reasonable
358              # default.
359          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
360              # Compute Engine API.
361          "ipConfiguration": "A String", # Configuration for VM IPs.
362          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
363              # service will choose a number of threads (according to the number of cores
364              # on the selected machine type for batch, or 1 by convention for streaming).
365          "poolArgs": { # Extra arguments for this worker pool.
366            "a_key": "", # Properties of the object. Contains field @type with type URL.
367          },
368          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
369              # execute the job.  If zero or unspecified, the service will
370              # attempt to choose a reasonable default.
371          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
372              # harness, residing in Google Container Registry.
373          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
374              # the form "regions/REGION/subnetworks/SUBNETWORK".
375          "packages": [ # Packages to be installed on workers.
376            { # The packages that must be installed in order for a worker to run the
377                # steps of the Cloud Dataflow job that will be assigned to its worker
378                # pool.
379                #
380                # This is the mechanism by which the Cloud Dataflow SDK causes code to
381                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
382                # might use this to install jars containing the user's code and all of the
383                # various dependencies (libraries, data files, etc.) required in order
384                # for that code to run.
385              "location": "A String", # The resource to read the package from. The supported resource type is:
386                  #
387                  # Google Cloud Storage:
388                  #
389                  #   storage.googleapis.com/{bucket}
390                  #   bucket.storage.googleapis.com/
391              "name": "A String", # The name of the package.
392            },
393          ],
394          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
395            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
396            "algorithm": "A String", # The algorithm to use for autoscaling.
397          },
398          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
399              # select a default set of packages which are useful to worker
400              # harnesses written in a particular language.
401          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
402              # attempt to choose a reasonable default.
403          "metadata": { # Metadata to set on the Google Compute Engine VMs.
404            "a_key": "A String",
405          },
406        },
407      ],
408    },
409    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
410        # A description of the user pipeline and stages through which it is executed.
411        # Created by Cloud Dataflow service.  Only retrieved with
412        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
413        # form.  This data is provided by the Dataflow service for ease of visualizing
414        # the pipeline and interpretting Dataflow provided metrics.
415      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
416        { # Description of the type, names/ids, and input/outputs for a transform.
417          "kind": "A String", # Type of transform.
418          "name": "A String", # User provided name for this transform instance.
419          "inputCollectionName": [ # User names for all collection inputs to this transform.
420            "A String",
421          ],
422          "displayData": [ # Transform-specific display data.
423            { # Data provided with a pipeline or transform to provide descriptive info.
424              "shortStrValue": "A String", # A possible additional shorter value to display.
425                  # For example a java_class_name_value of com.mypackage.MyDoFn
426                  # will be stored with MyDoFn as the short_str_value and
427                  # com.mypackage.MyDoFn as the java_class_name value.
428                  # short_str_value can be displayed and java_class_name_value
429                  # will be displayed as a tooltip.
430              "durationValue": "A String", # Contains value if the data is of duration type.
431              "url": "A String", # An optional full URL.
432              "floatValue": 3.14, # Contains value if the data is of float type.
433              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
434                  # language namespace (i.e. python module) which defines the display data.
435                  # This allows a dax monitoring system to specially handle the data
436                  # and perform custom rendering.
437              "javaClassValue": "A String", # Contains value if the data is of java class type.
438              "label": "A String", # An optional label to display in a dax UI for the element.
439              "boolValue": True or False, # Contains value if the data is of a boolean type.
440              "strValue": "A String", # Contains value if the data is of string type.
441              "key": "A String", # The key identifying the display data.
442                  # This is intended to be used as a label for the display data
443                  # when viewed in a dax monitoring system.
444              "int64Value": "A String", # Contains value if the data is of int64 type.
445              "timestampValue": "A String", # Contains value if the data is of timestamp type.
446            },
447          ],
448          "outputCollectionName": [ # User  names for all collection outputs to this transform.
449            "A String",
450          ],
451          "id": "A String", # SDK generated id of this transform instance.
452        },
453      ],
454      "displayData": [ # Pipeline level display data.
455        { # Data provided with a pipeline or transform to provide descriptive info.
456          "shortStrValue": "A String", # A possible additional shorter value to display.
457              # For example a java_class_name_value of com.mypackage.MyDoFn
458              # will be stored with MyDoFn as the short_str_value and
459              # com.mypackage.MyDoFn as the java_class_name value.
460              # short_str_value can be displayed and java_class_name_value
461              # will be displayed as a tooltip.
462          "durationValue": "A String", # Contains value if the data is of duration type.
463          "url": "A String", # An optional full URL.
464          "floatValue": 3.14, # Contains value if the data is of float type.
465          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
466              # language namespace (i.e. python module) which defines the display data.
467              # This allows a dax monitoring system to specially handle the data
468              # and perform custom rendering.
469          "javaClassValue": "A String", # Contains value if the data is of java class type.
470          "label": "A String", # An optional label to display in a dax UI for the element.
471          "boolValue": True or False, # Contains value if the data is of a boolean type.
472          "strValue": "A String", # Contains value if the data is of string type.
473          "key": "A String", # The key identifying the display data.
474              # This is intended to be used as a label for the display data
475              # when viewed in a dax monitoring system.
476          "int64Value": "A String", # Contains value if the data is of int64 type.
477          "timestampValue": "A String", # Contains value if the data is of timestamp type.
478        },
479      ],
480      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
481        { # Description of the composing transforms, names/ids, and input/outputs of a
482            # stage of execution.  Some composing transforms and sources may have been
483            # generated by the Dataflow service during execution planning.
484          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
485            { # Description of an interstitial value between transforms in an execution
486                # stage.
487              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
488              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
489                  # source is most closely associated.
490              "name": "A String", # Dataflow service generated name for this source.
491            },
492          ],
493          "kind": "A String", # Type of tranform this stage is executing.
494          "name": "A String", # Dataflow service generated name for this stage.
495          "outputSource": [ # Output sources for this stage.
496            { # Description of an input or output of an execution stage.
497              "userName": "A String", # Human-readable name for this source; may be user or system generated.
498              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
499                  # source is most closely associated.
500              "name": "A String", # Dataflow service generated name for this source.
501              "sizeBytes": "A String", # Size of the source, if measurable.
502            },
503          ],
504          "inputSource": [ # Input sources for this stage.
505            { # Description of an input or output of an execution stage.
506              "userName": "A String", # Human-readable name for this source; may be user or system generated.
507              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
508                  # source is most closely associated.
509              "name": "A String", # Dataflow service generated name for this source.
510              "sizeBytes": "A String", # Size of the source, if measurable.
511            },
512          ],
513          "componentTransform": [ # Transforms that comprise this execution stage.
514            { # Description of a transform executed as part of an execution stage.
515              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
516              "originalTransform": "A String", # User name for the original user transform with which this transform is
517                  # most closely associated.
518              "name": "A String", # Dataflow service generated name for this source.
519            },
520          ],
521          "id": "A String", # Dataflow service generated id for this stage.
522        },
523      ],
524    },
525    "steps": [ # The top-level steps that constitute the entire job.
526      { # Defines a particular step within a Cloud Dataflow job.
527          #
528          # A job consists of multiple steps, each of which performs some
529          # specific operation as part of the overall job.  Data is typically
530          # passed from one step to another as part of the job.
531          #
532          # Here's an example of a sequence of steps which together implement a
533          # Map-Reduce job:
534          #
535          #   * Read a collection of data from some source, parsing the
536          #     collection's elements.
537          #
538          #   * Validate the elements.
539          #
540          #   * Apply a user-defined function to map each element to some value
541          #     and extract an element-specific key value.
542          #
543          #   * Group elements with the same key into a single element with
544          #     that key, transforming a multiply-keyed collection into a
545          #     uniquely-keyed collection.
546          #
547          #   * Write the elements out to some data sink.
548          #
549          # Note that the Cloud Dataflow service may be used to run many different
550          # types of jobs, not just Map-Reduce.
551        "kind": "A String", # The kind of step in the Cloud Dataflow job.
552        "properties": { # Named properties associated with the step. Each kind of
553            # predefined step has its own required set of properties.
554            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
555          "a_key": "", # Properties of the object.
556        },
557        "name": "A String", # The name that identifies the step. This must be unique for each
558            # step with respect to all other steps in the Cloud Dataflow job.
559      },
560    ],
561    "currentStateTime": "A String", # The timestamp associated with the current state.
562    "tempFiles": [ # A set of files the system should be aware of that are used
563        # for temporary storage. These temporary files will be
564        # removed on job completion.
565        # No duplicates are allowed.
566        # No file patterns are supported.
567        #
568        # The supported files are:
569        #
570        # Google Cloud Storage:
571        #
572        #    storage.googleapis.com/{bucket}/{object}
573        #    bucket.storage.googleapis.com/{object}
574      "A String",
575    ],
576    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
577        # callers cannot mutate it.
578      { # A message describing the state of a particular execution stage.
579        "executionStageName": "A String", # The name of the execution stage.
580        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
581        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
582      },
583    ],
584    "type": "A String", # The type of Cloud Dataflow job.
585    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
586        # Cloud Dataflow service.
587    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
588        # of the job it replaced.
589        #
590        # When sending a `CreateJobRequest`, you can update a job by specifying it
591        # here. The job named here is stopped, and its intermediate state is
592        # transferred to this job.
593    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
594        # isn't contained in the submitted job.
595      "stages": { # A mapping from each stage to the information about that stage.
596        "a_key": { # Contains information about how a particular
597            # google.dataflow.v1beta3.Step will be executed.
598          "stepName": [ # The steps associated with the execution stage.
599              # Note that stages may have several steps, and that a given step
600              # might be run by more than one stage.
601            "A String",
602          ],
603        },
604      },
605    },
606  }
607
608  x__xgafv: string, V1 error format.
609    Allowed values
610      1 - v1 error format
611      2 - v2 error format
612  replaceJobId: string, Deprecated. This field is now in the Job message.
613  view: string, The level of information requested in response.
614
615Returns:
616  An object of the form:
617
618    { # Defines a job to be run by the Cloud Dataflow service.
619      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
620          # If this field is set, the service will ensure its uniqueness.
621          # The request to create a job will fail if the service has knowledge of a
622          # previously submitted job with the same client's ID and job name.
623          # The caller may use this field to ensure idempotence of job
624          # creation across retried attempts to create a job.
625          # By default, the field is empty and, in that case, the service ignores it.
626      "requestedState": "A String", # The job's requested state.
627          #
628          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
629          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
630          # also be used to directly set a job's requested state to
631          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
632          # job if it has not already reached a terminal state.
633      "name": "A String", # The user-specified Cloud Dataflow job name.
634          #
635          # Only one Job with a given name may exist in a project at any
636          # given time. If a caller attempts to create a Job with the same
637          # name as an already-existing Job, the attempt returns the
638          # existing Job.
639          #
640          # The name must match the regular expression
641          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
642      "location": "A String", # The location that contains this job.
643      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
644          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
645      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
646      "currentState": "A String", # The current state of the job.
647          #
648          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
649          # specified.
650          #
651          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
652          # terminal state. After a job has reached a terminal state, no
653          # further state updates may be made.
654          #
655          # This field may be mutated by the Cloud Dataflow service;
656          # callers cannot mutate it.
657      "labels": { # User-defined labels for this job.
658          #
659          # The labels map can contain no more than 64 entries.  Entries of the labels
660          # map are UTF8 strings that comply with the following restrictions:
661          #
662          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
663          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
664          # * Both keys and values are additionally constrained to be <= 128 bytes in
665          # size.
666        "a_key": "A String",
667      },
668      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
669          # corresponding name prefixes of the new job.
670        "a_key": "A String",
671      },
672      "id": "A String", # The unique ID of this job.
673          #
674          # This field is set by the Cloud Dataflow service when the Job is
675          # created, and is immutable for the life of the job.
676      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
677        "version": { # A structure describing which components and their versions of the service
678            # are required in order to run the job.
679          "a_key": "", # Properties of the object.
680        },
681        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
682            # storage.  The system will append the suffix "/temp-{JOBNAME} to
683            # this resource prefix, where {JOBNAME} is the value of the
684            # job_name field.  The resulting bucket and object prefix is used
685            # as the prefix of the resources used to store temporary data
686            # needed during the job execution.  NOTE: This will override the
687            # value in taskrunner_settings.
688            # The supported resource type is:
689            #
690            # Google Cloud Storage:
691            #
692            #   storage.googleapis.com/{bucket}/{object}
693            #   bucket.storage.googleapis.com/{object}
694        "internalExperiments": { # Experimental settings.
695          "a_key": "", # Properties of the object. Contains field @type with type URL.
696        },
697        "dataset": "A String", # The dataset for the current project where various workflow
698            # related tables are stored.
699            #
700            # The supported resource type is:
701            #
702            # Google BigQuery:
703            #   bigquery.googleapis.com/{dataset}
704        "experiments": [ # The list of experiments to enable.
705          "A String",
706        ],
707        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
708        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
709            # options are passed through the service and are used to recreate the
710            # SDK pipeline options on the worker in a language agnostic and platform
711            # independent way.
712          "a_key": "", # Properties of the object.
713        },
714        "userAgent": { # A description of the process that generated the request.
715          "a_key": "", # Properties of the object.
716        },
717        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
718            # unspecified, the service will attempt to choose a reasonable
719            # default.  This should be in the form of the API service name,
720            # e.g. "compute.googleapis.com".
721        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
722            # specified in order for the job to have workers.
723          { # Describes one particular pool of Cloud Dataflow workers to be
724              # instantiated by the Cloud Dataflow service in order to perform the
725              # computations required by a job.  Note that a workflow job may use
726              # multiple pools, in order to match the various computational
727              # requirements of the various stages of the job.
728            "diskSourceImage": "A String", # Fully qualified source image for disks.
729            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
730                # using the standard Dataflow task runner.  Users should ignore
731                # this field.
732              "workflowFileName": "A String", # The file to store the workflow in.
733              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
734                  # will not be uploaded.
735                  #
736                  # The supported resource type is:
737                  #
738                  # Google Cloud Storage:
739                  #   storage.googleapis.com/{bucket}/{object}
740                  #   bucket.storage.googleapis.com/{object}
741              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
742              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
743                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
744                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
745                    # "shuffle/v1beta1".
746                "workerId": "A String", # The ID of the worker running this pipeline.
747                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
748                    #
749                    # When workers access Google Cloud APIs, they logically do so via
750                    # relative URLs.  If this field is specified, it supplies the base
751                    # URL to use for resolving these relative URLs.  The normative
752                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
753                    # Locators".
754                    #
755                    # If not specified, the default value is "http://www.googleapis.com/"
756                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
757                    # "dataflow/v1b3/projects".
758                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
759                    # storage.
760                    #
761                    # The supported resource type is:
762                    #
763                    # Google Cloud Storage:
764                    #
765                    #   storage.googleapis.com/{bucket}/{object}
766                    #   bucket.storage.googleapis.com/{object}
767              },
768              "vmId": "A String", # The ID string of the VM.
769              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
770              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
771              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
772                  # access the Cloud Dataflow API.
773                "A String",
774              ],
775              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
776                  # taskrunner; e.g. "root".
777              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
778                  #
779                  # When workers access Google Cloud APIs, they logically do so via
780                  # relative URLs.  If this field is specified, it supplies the base
781                  # URL to use for resolving these relative URLs.  The normative
782                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
783                  # Locators".
784                  #
785                  # If not specified, the default value is "http://www.googleapis.com/"
786              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
787                  # taskrunner; e.g. "wheel".
788              "languageHint": "A String", # The suggested backend language.
789              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
790                  # console.
791              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
792              "logDir": "A String", # The directory on the VM to store logs.
793              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
794              "harnessCommand": "A String", # The command to launch the worker harness.
795              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
796                  # temporary storage.
797                  #
798                  # The supported resource type is:
799                  #
800                  # Google Cloud Storage:
801                  #   storage.googleapis.com/{bucket}/{object}
802                  #   bucket.storage.googleapis.com/{object}
803              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
804            },
805            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
806                # are supported.
807            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
808                # service will attempt to choose a reasonable default.
809            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
810                # the service will use the network "default".
811            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
812                # will attempt to choose a reasonable default.
813            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
814                # attempt to choose a reasonable default.
815            "dataDisks": [ # Data disks that are used by a VM in this workflow.
816              { # Describes the data disk used by a workflow job.
817                "mountPoint": "A String", # Directory in a VM where disk is mounted.
818                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
819                    # attempt to choose a reasonable default.
820                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
821                    # must be a disk type appropriate to the project and zone in which
822                    # the workers will run.  If unknown or unspecified, the service
823                    # will attempt to choose a reasonable default.
824                    #
825                    # For example, the standard persistent disk type is a resource name
826                    # typically ending in "pd-standard".  If SSD persistent disks are
827                    # available, the resource name typically ends with "pd-ssd".  The
828                    # actual valid values are defined the Google Compute Engine API,
829                    # not by the Cloud Dataflow API; consult the Google Compute Engine
830                    # documentation for more information about determining the set of
831                    # available disk types for a particular project and zone.
832                    #
833                    # Google Compute Engine Disk types are local to a particular
834                    # project in a particular zone, and so the resource name will
835                    # typically look something like this:
836                    #
837                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
838              },
839            ],
840            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
841                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
842                # `TEARDOWN_NEVER`.
843                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
844                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
845                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
846                # down.
847                #
848                # If the workers are not torn down by the service, they will
849                # continue to run and use Google Compute Engine VM resources in the
850                # user's project until they are explicitly terminated by the user.
851                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
852                # policy except for small, manually supervised test jobs.
853                #
854                # If unknown or unspecified, the service will attempt to choose a reasonable
855                # default.
856            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
857                # Compute Engine API.
858            "ipConfiguration": "A String", # Configuration for VM IPs.
859            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
860                # service will choose a number of threads (according to the number of cores
861                # on the selected machine type for batch, or 1 by convention for streaming).
862            "poolArgs": { # Extra arguments for this worker pool.
863              "a_key": "", # Properties of the object. Contains field @type with type URL.
864            },
865            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
866                # execute the job.  If zero or unspecified, the service will
867                # attempt to choose a reasonable default.
868            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
869                # harness, residing in Google Container Registry.
870            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
871                # the form "regions/REGION/subnetworks/SUBNETWORK".
872            "packages": [ # Packages to be installed on workers.
873              { # The packages that must be installed in order for a worker to run the
874                  # steps of the Cloud Dataflow job that will be assigned to its worker
875                  # pool.
876                  #
877                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
878                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
879                  # might use this to install jars containing the user's code and all of the
880                  # various dependencies (libraries, data files, etc.) required in order
881                  # for that code to run.
882                "location": "A String", # The resource to read the package from. The supported resource type is:
883                    #
884                    # Google Cloud Storage:
885                    #
886                    #   storage.googleapis.com/{bucket}
887                    #   bucket.storage.googleapis.com/
888                "name": "A String", # The name of the package.
889              },
890            ],
891            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
892              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
893              "algorithm": "A String", # The algorithm to use for autoscaling.
894            },
895            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
896                # select a default set of packages which are useful to worker
897                # harnesses written in a particular language.
898            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
899                # attempt to choose a reasonable default.
900            "metadata": { # Metadata to set on the Google Compute Engine VMs.
901              "a_key": "A String",
902            },
903          },
904        ],
905      },
906      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
907          # A description of the user pipeline and stages through which it is executed.
908          # Created by Cloud Dataflow service.  Only retrieved with
909          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
910          # form.  This data is provided by the Dataflow service for ease of visualizing
911          # the pipeline and interpretting Dataflow provided metrics.
912        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
913          { # Description of the type, names/ids, and input/outputs for a transform.
914            "kind": "A String", # Type of transform.
915            "name": "A String", # User provided name for this transform instance.
916            "inputCollectionName": [ # User names for all collection inputs to this transform.
917              "A String",
918            ],
919            "displayData": [ # Transform-specific display data.
920              { # Data provided with a pipeline or transform to provide descriptive info.
921                "shortStrValue": "A String", # A possible additional shorter value to display.
922                    # For example a java_class_name_value of com.mypackage.MyDoFn
923                    # will be stored with MyDoFn as the short_str_value and
924                    # com.mypackage.MyDoFn as the java_class_name value.
925                    # short_str_value can be displayed and java_class_name_value
926                    # will be displayed as a tooltip.
927                "durationValue": "A String", # Contains value if the data is of duration type.
928                "url": "A String", # An optional full URL.
929                "floatValue": 3.14, # Contains value if the data is of float type.
930                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
931                    # language namespace (i.e. python module) which defines the display data.
932                    # This allows a dax monitoring system to specially handle the data
933                    # and perform custom rendering.
934                "javaClassValue": "A String", # Contains value if the data is of java class type.
935                "label": "A String", # An optional label to display in a dax UI for the element.
936                "boolValue": True or False, # Contains value if the data is of a boolean type.
937                "strValue": "A String", # Contains value if the data is of string type.
938                "key": "A String", # The key identifying the display data.
939                    # This is intended to be used as a label for the display data
940                    # when viewed in a dax monitoring system.
941                "int64Value": "A String", # Contains value if the data is of int64 type.
942                "timestampValue": "A String", # Contains value if the data is of timestamp type.
943              },
944            ],
945            "outputCollectionName": [ # User  names for all collection outputs to this transform.
946              "A String",
947            ],
948            "id": "A String", # SDK generated id of this transform instance.
949          },
950        ],
951        "displayData": [ # Pipeline level display data.
952          { # Data provided with a pipeline or transform to provide descriptive info.
953            "shortStrValue": "A String", # A possible additional shorter value to display.
954                # For example a java_class_name_value of com.mypackage.MyDoFn
955                # will be stored with MyDoFn as the short_str_value and
956                # com.mypackage.MyDoFn as the java_class_name value.
957                # short_str_value can be displayed and java_class_name_value
958                # will be displayed as a tooltip.
959            "durationValue": "A String", # Contains value if the data is of duration type.
960            "url": "A String", # An optional full URL.
961            "floatValue": 3.14, # Contains value if the data is of float type.
962            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
963                # language namespace (i.e. python module) which defines the display data.
964                # This allows a dax monitoring system to specially handle the data
965                # and perform custom rendering.
966            "javaClassValue": "A String", # Contains value if the data is of java class type.
967            "label": "A String", # An optional label to display in a dax UI for the element.
968            "boolValue": True or False, # Contains value if the data is of a boolean type.
969            "strValue": "A String", # Contains value if the data is of string type.
970            "key": "A String", # The key identifying the display data.
971                # This is intended to be used as a label for the display data
972                # when viewed in a dax monitoring system.
973            "int64Value": "A String", # Contains value if the data is of int64 type.
974            "timestampValue": "A String", # Contains value if the data is of timestamp type.
975          },
976        ],
977        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
978          { # Description of the composing transforms, names/ids, and input/outputs of a
979              # stage of execution.  Some composing transforms and sources may have been
980              # generated by the Dataflow service during execution planning.
981            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
982              { # Description of an interstitial value between transforms in an execution
983                  # stage.
984                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
985                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
986                    # source is most closely associated.
987                "name": "A String", # Dataflow service generated name for this source.
988              },
989            ],
990            "kind": "A String", # Type of tranform this stage is executing.
991            "name": "A String", # Dataflow service generated name for this stage.
992            "outputSource": [ # Output sources for this stage.
993              { # Description of an input or output of an execution stage.
994                "userName": "A String", # Human-readable name for this source; may be user or system generated.
995                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
996                    # source is most closely associated.
997                "name": "A String", # Dataflow service generated name for this source.
998                "sizeBytes": "A String", # Size of the source, if measurable.
999              },
1000            ],
1001            "inputSource": [ # Input sources for this stage.
1002              { # Description of an input or output of an execution stage.
1003                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1004                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1005                    # source is most closely associated.
1006                "name": "A String", # Dataflow service generated name for this source.
1007                "sizeBytes": "A String", # Size of the source, if measurable.
1008              },
1009            ],
1010            "componentTransform": [ # Transforms that comprise this execution stage.
1011              { # Description of a transform executed as part of an execution stage.
1012                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1013                "originalTransform": "A String", # User name for the original user transform with which this transform is
1014                    # most closely associated.
1015                "name": "A String", # Dataflow service generated name for this source.
1016              },
1017            ],
1018            "id": "A String", # Dataflow service generated id for this stage.
1019          },
1020        ],
1021      },
1022      "steps": [ # The top-level steps that constitute the entire job.
1023        { # Defines a particular step within a Cloud Dataflow job.
1024            #
1025            # A job consists of multiple steps, each of which performs some
1026            # specific operation as part of the overall job.  Data is typically
1027            # passed from one step to another as part of the job.
1028            #
1029            # Here's an example of a sequence of steps which together implement a
1030            # Map-Reduce job:
1031            #
1032            #   * Read a collection of data from some source, parsing the
1033            #     collection's elements.
1034            #
1035            #   * Validate the elements.
1036            #
1037            #   * Apply a user-defined function to map each element to some value
1038            #     and extract an element-specific key value.
1039            #
1040            #   * Group elements with the same key into a single element with
1041            #     that key, transforming a multiply-keyed collection into a
1042            #     uniquely-keyed collection.
1043            #
1044            #   * Write the elements out to some data sink.
1045            #
1046            # Note that the Cloud Dataflow service may be used to run many different
1047            # types of jobs, not just Map-Reduce.
1048          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1049          "properties": { # Named properties associated with the step. Each kind of
1050              # predefined step has its own required set of properties.
1051              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1052            "a_key": "", # Properties of the object.
1053          },
1054          "name": "A String", # The name that identifies the step. This must be unique for each
1055              # step with respect to all other steps in the Cloud Dataflow job.
1056        },
1057      ],
1058      "currentStateTime": "A String", # The timestamp associated with the current state.
1059      "tempFiles": [ # A set of files the system should be aware of that are used
1060          # for temporary storage. These temporary files will be
1061          # removed on job completion.
1062          # No duplicates are allowed.
1063          # No file patterns are supported.
1064          #
1065          # The supported files are:
1066          #
1067          # Google Cloud Storage:
1068          #
1069          #    storage.googleapis.com/{bucket}/{object}
1070          #    bucket.storage.googleapis.com/{object}
1071        "A String",
1072      ],
1073      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1074          # callers cannot mutate it.
1075        { # A message describing the state of a particular execution stage.
1076          "executionStageName": "A String", # The name of the execution stage.
1077          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1078          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1079        },
1080      ],
1081      "type": "A String", # The type of Cloud Dataflow job.
1082      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1083          # Cloud Dataflow service.
1084      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1085          # of the job it replaced.
1086          #
1087          # When sending a `CreateJobRequest`, you can update a job by specifying it
1088          # here. The job named here is stopped, and its intermediate state is
1089          # transferred to this job.
1090      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1091          # isn't contained in the submitted job.
1092        "stages": { # A mapping from each stage to the information about that stage.
1093          "a_key": { # Contains information about how a particular
1094              # google.dataflow.v1beta3.Step will be executed.
1095            "stepName": [ # The steps associated with the execution stage.
1096                # Note that stages may have several steps, and that a given step
1097                # might be run by more than one stage.
1098              "A String",
1099            ],
1100          },
1101        },
1102      },
1103    }</pre>
1104</div>
1105
1106<div class="method">
1107    <code class="details" id="get">get(projectId, location, jobId, x__xgafv=None, view=None)</code>
1108  <pre>Gets the state of the specified Cloud Dataflow job.
1109
1110Args:
1111  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1112  location: string, The location that contains this job. (required)
1113  jobId: string, The job ID. (required)
1114  x__xgafv: string, V1 error format.
1115    Allowed values
1116      1 - v1 error format
1117      2 - v2 error format
1118  view: string, The level of information requested in response.
1119
1120Returns:
1121  An object of the form:
1122
1123    { # Defines a job to be run by the Cloud Dataflow service.
1124      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1125          # If this field is set, the service will ensure its uniqueness.
1126          # The request to create a job will fail if the service has knowledge of a
1127          # previously submitted job with the same client's ID and job name.
1128          # The caller may use this field to ensure idempotence of job
1129          # creation across retried attempts to create a job.
1130          # By default, the field is empty and, in that case, the service ignores it.
1131      "requestedState": "A String", # The job's requested state.
1132          #
1133          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1134          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1135          # also be used to directly set a job's requested state to
1136          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1137          # job if it has not already reached a terminal state.
1138      "name": "A String", # The user-specified Cloud Dataflow job name.
1139          #
1140          # Only one Job with a given name may exist in a project at any
1141          # given time. If a caller attempts to create a Job with the same
1142          # name as an already-existing Job, the attempt returns the
1143          # existing Job.
1144          #
1145          # The name must match the regular expression
1146          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1147      "location": "A String", # The location that contains this job.
1148      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1149          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1150      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1151      "currentState": "A String", # The current state of the job.
1152          #
1153          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1154          # specified.
1155          #
1156          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1157          # terminal state. After a job has reached a terminal state, no
1158          # further state updates may be made.
1159          #
1160          # This field may be mutated by the Cloud Dataflow service;
1161          # callers cannot mutate it.
1162      "labels": { # User-defined labels for this job.
1163          #
1164          # The labels map can contain no more than 64 entries.  Entries of the labels
1165          # map are UTF8 strings that comply with the following restrictions:
1166          #
1167          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1168          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1169          # * Both keys and values are additionally constrained to be <= 128 bytes in
1170          # size.
1171        "a_key": "A String",
1172      },
1173      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1174          # corresponding name prefixes of the new job.
1175        "a_key": "A String",
1176      },
1177      "id": "A String", # The unique ID of this job.
1178          #
1179          # This field is set by the Cloud Dataflow service when the Job is
1180          # created, and is immutable for the life of the job.
1181      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1182        "version": { # A structure describing which components and their versions of the service
1183            # are required in order to run the job.
1184          "a_key": "", # Properties of the object.
1185        },
1186        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1187            # storage.  The system will append the suffix "/temp-{JOBNAME} to
1188            # this resource prefix, where {JOBNAME} is the value of the
1189            # job_name field.  The resulting bucket and object prefix is used
1190            # as the prefix of the resources used to store temporary data
1191            # needed during the job execution.  NOTE: This will override the
1192            # value in taskrunner_settings.
1193            # The supported resource type is:
1194            #
1195            # Google Cloud Storage:
1196            #
1197            #   storage.googleapis.com/{bucket}/{object}
1198            #   bucket.storage.googleapis.com/{object}
1199        "internalExperiments": { # Experimental settings.
1200          "a_key": "", # Properties of the object. Contains field @type with type URL.
1201        },
1202        "dataset": "A String", # The dataset for the current project where various workflow
1203            # related tables are stored.
1204            #
1205            # The supported resource type is:
1206            #
1207            # Google BigQuery:
1208            #   bigquery.googleapis.com/{dataset}
1209        "experiments": [ # The list of experiments to enable.
1210          "A String",
1211        ],
1212        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1213        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1214            # options are passed through the service and are used to recreate the
1215            # SDK pipeline options on the worker in a language agnostic and platform
1216            # independent way.
1217          "a_key": "", # Properties of the object.
1218        },
1219        "userAgent": { # A description of the process that generated the request.
1220          "a_key": "", # Properties of the object.
1221        },
1222        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1223            # unspecified, the service will attempt to choose a reasonable
1224            # default.  This should be in the form of the API service name,
1225            # e.g. "compute.googleapis.com".
1226        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1227            # specified in order for the job to have workers.
1228          { # Describes one particular pool of Cloud Dataflow workers to be
1229              # instantiated by the Cloud Dataflow service in order to perform the
1230              # computations required by a job.  Note that a workflow job may use
1231              # multiple pools, in order to match the various computational
1232              # requirements of the various stages of the job.
1233            "diskSourceImage": "A String", # Fully qualified source image for disks.
1234            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1235                # using the standard Dataflow task runner.  Users should ignore
1236                # this field.
1237              "workflowFileName": "A String", # The file to store the workflow in.
1238              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1239                  # will not be uploaded.
1240                  #
1241                  # The supported resource type is:
1242                  #
1243                  # Google Cloud Storage:
1244                  #   storage.googleapis.com/{bucket}/{object}
1245                  #   bucket.storage.googleapis.com/{object}
1246              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1247              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1248                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1249                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1250                    # "shuffle/v1beta1".
1251                "workerId": "A String", # The ID of the worker running this pipeline.
1252                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1253                    #
1254                    # When workers access Google Cloud APIs, they logically do so via
1255                    # relative URLs.  If this field is specified, it supplies the base
1256                    # URL to use for resolving these relative URLs.  The normative
1257                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1258                    # Locators".
1259                    #
1260                    # If not specified, the default value is "http://www.googleapis.com/"
1261                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1262                    # "dataflow/v1b3/projects".
1263                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1264                    # storage.
1265                    #
1266                    # The supported resource type is:
1267                    #
1268                    # Google Cloud Storage:
1269                    #
1270                    #   storage.googleapis.com/{bucket}/{object}
1271                    #   bucket.storage.googleapis.com/{object}
1272              },
1273              "vmId": "A String", # The ID string of the VM.
1274              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1275              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1276              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1277                  # access the Cloud Dataflow API.
1278                "A String",
1279              ],
1280              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1281                  # taskrunner; e.g. "root".
1282              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1283                  #
1284                  # When workers access Google Cloud APIs, they logically do so via
1285                  # relative URLs.  If this field is specified, it supplies the base
1286                  # URL to use for resolving these relative URLs.  The normative
1287                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1288                  # Locators".
1289                  #
1290                  # If not specified, the default value is "http://www.googleapis.com/"
1291              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1292                  # taskrunner; e.g. "wheel".
1293              "languageHint": "A String", # The suggested backend language.
1294              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1295                  # console.
1296              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1297              "logDir": "A String", # The directory on the VM to store logs.
1298              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1299              "harnessCommand": "A String", # The command to launch the worker harness.
1300              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1301                  # temporary storage.
1302                  #
1303                  # The supported resource type is:
1304                  #
1305                  # Google Cloud Storage:
1306                  #   storage.googleapis.com/{bucket}/{object}
1307                  #   bucket.storage.googleapis.com/{object}
1308              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1309            },
1310            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1311                # are supported.
1312            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1313                # service will attempt to choose a reasonable default.
1314            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1315                # the service will use the network "default".
1316            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1317                # will attempt to choose a reasonable default.
1318            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1319                # attempt to choose a reasonable default.
1320            "dataDisks": [ # Data disks that are used by a VM in this workflow.
1321              { # Describes the data disk used by a workflow job.
1322                "mountPoint": "A String", # Directory in a VM where disk is mounted.
1323                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1324                    # attempt to choose a reasonable default.
1325                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1326                    # must be a disk type appropriate to the project and zone in which
1327                    # the workers will run.  If unknown or unspecified, the service
1328                    # will attempt to choose a reasonable default.
1329                    #
1330                    # For example, the standard persistent disk type is a resource name
1331                    # typically ending in "pd-standard".  If SSD persistent disks are
1332                    # available, the resource name typically ends with "pd-ssd".  The
1333                    # actual valid values are defined the Google Compute Engine API,
1334                    # not by the Cloud Dataflow API; consult the Google Compute Engine
1335                    # documentation for more information about determining the set of
1336                    # available disk types for a particular project and zone.
1337                    #
1338                    # Google Compute Engine Disk types are local to a particular
1339                    # project in a particular zone, and so the resource name will
1340                    # typically look something like this:
1341                    #
1342                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1343              },
1344            ],
1345            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1346                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1347                # `TEARDOWN_NEVER`.
1348                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1349                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1350                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1351                # down.
1352                #
1353                # If the workers are not torn down by the service, they will
1354                # continue to run and use Google Compute Engine VM resources in the
1355                # user's project until they are explicitly terminated by the user.
1356                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1357                # policy except for small, manually supervised test jobs.
1358                #
1359                # If unknown or unspecified, the service will attempt to choose a reasonable
1360                # default.
1361            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1362                # Compute Engine API.
1363            "ipConfiguration": "A String", # Configuration for VM IPs.
1364            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1365                # service will choose a number of threads (according to the number of cores
1366                # on the selected machine type for batch, or 1 by convention for streaming).
1367            "poolArgs": { # Extra arguments for this worker pool.
1368              "a_key": "", # Properties of the object. Contains field @type with type URL.
1369            },
1370            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1371                # execute the job.  If zero or unspecified, the service will
1372                # attempt to choose a reasonable default.
1373            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1374                # harness, residing in Google Container Registry.
1375            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1376                # the form "regions/REGION/subnetworks/SUBNETWORK".
1377            "packages": [ # Packages to be installed on workers.
1378              { # The packages that must be installed in order for a worker to run the
1379                  # steps of the Cloud Dataflow job that will be assigned to its worker
1380                  # pool.
1381                  #
1382                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
1383                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1384                  # might use this to install jars containing the user's code and all of the
1385                  # various dependencies (libraries, data files, etc.) required in order
1386                  # for that code to run.
1387                "location": "A String", # The resource to read the package from. The supported resource type is:
1388                    #
1389                    # Google Cloud Storage:
1390                    #
1391                    #   storage.googleapis.com/{bucket}
1392                    #   bucket.storage.googleapis.com/
1393                "name": "A String", # The name of the package.
1394              },
1395            ],
1396            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1397              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1398              "algorithm": "A String", # The algorithm to use for autoscaling.
1399            },
1400            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1401                # select a default set of packages which are useful to worker
1402                # harnesses written in a particular language.
1403            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1404                # attempt to choose a reasonable default.
1405            "metadata": { # Metadata to set on the Google Compute Engine VMs.
1406              "a_key": "A String",
1407            },
1408          },
1409        ],
1410      },
1411      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1412          # A description of the user pipeline and stages through which it is executed.
1413          # Created by Cloud Dataflow service.  Only retrieved with
1414          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1415          # form.  This data is provided by the Dataflow service for ease of visualizing
1416          # the pipeline and interpretting Dataflow provided metrics.
1417        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1418          { # Description of the type, names/ids, and input/outputs for a transform.
1419            "kind": "A String", # Type of transform.
1420            "name": "A String", # User provided name for this transform instance.
1421            "inputCollectionName": [ # User names for all collection inputs to this transform.
1422              "A String",
1423            ],
1424            "displayData": [ # Transform-specific display data.
1425              { # Data provided with a pipeline or transform to provide descriptive info.
1426                "shortStrValue": "A String", # A possible additional shorter value to display.
1427                    # For example a java_class_name_value of com.mypackage.MyDoFn
1428                    # will be stored with MyDoFn as the short_str_value and
1429                    # com.mypackage.MyDoFn as the java_class_name value.
1430                    # short_str_value can be displayed and java_class_name_value
1431                    # will be displayed as a tooltip.
1432                "durationValue": "A String", # Contains value if the data is of duration type.
1433                "url": "A String", # An optional full URL.
1434                "floatValue": 3.14, # Contains value if the data is of float type.
1435                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1436                    # language namespace (i.e. python module) which defines the display data.
1437                    # This allows a dax monitoring system to specially handle the data
1438                    # and perform custom rendering.
1439                "javaClassValue": "A String", # Contains value if the data is of java class type.
1440                "label": "A String", # An optional label to display in a dax UI for the element.
1441                "boolValue": True or False, # Contains value if the data is of a boolean type.
1442                "strValue": "A String", # Contains value if the data is of string type.
1443                "key": "A String", # The key identifying the display data.
1444                    # This is intended to be used as a label for the display data
1445                    # when viewed in a dax monitoring system.
1446                "int64Value": "A String", # Contains value if the data is of int64 type.
1447                "timestampValue": "A String", # Contains value if the data is of timestamp type.
1448              },
1449            ],
1450            "outputCollectionName": [ # User  names for all collection outputs to this transform.
1451              "A String",
1452            ],
1453            "id": "A String", # SDK generated id of this transform instance.
1454          },
1455        ],
1456        "displayData": [ # Pipeline level display data.
1457          { # Data provided with a pipeline or transform to provide descriptive info.
1458            "shortStrValue": "A String", # A possible additional shorter value to display.
1459                # For example a java_class_name_value of com.mypackage.MyDoFn
1460                # will be stored with MyDoFn as the short_str_value and
1461                # com.mypackage.MyDoFn as the java_class_name value.
1462                # short_str_value can be displayed and java_class_name_value
1463                # will be displayed as a tooltip.
1464            "durationValue": "A String", # Contains value if the data is of duration type.
1465            "url": "A String", # An optional full URL.
1466            "floatValue": 3.14, # Contains value if the data is of float type.
1467            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1468                # language namespace (i.e. python module) which defines the display data.
1469                # This allows a dax monitoring system to specially handle the data
1470                # and perform custom rendering.
1471            "javaClassValue": "A String", # Contains value if the data is of java class type.
1472            "label": "A String", # An optional label to display in a dax UI for the element.
1473            "boolValue": True or False, # Contains value if the data is of a boolean type.
1474            "strValue": "A String", # Contains value if the data is of string type.
1475            "key": "A String", # The key identifying the display data.
1476                # This is intended to be used as a label for the display data
1477                # when viewed in a dax monitoring system.
1478            "int64Value": "A String", # Contains value if the data is of int64 type.
1479            "timestampValue": "A String", # Contains value if the data is of timestamp type.
1480          },
1481        ],
1482        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1483          { # Description of the composing transforms, names/ids, and input/outputs of a
1484              # stage of execution.  Some composing transforms and sources may have been
1485              # generated by the Dataflow service during execution planning.
1486            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1487              { # Description of an interstitial value between transforms in an execution
1488                  # stage.
1489                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1490                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1491                    # source is most closely associated.
1492                "name": "A String", # Dataflow service generated name for this source.
1493              },
1494            ],
1495            "kind": "A String", # Type of tranform this stage is executing.
1496            "name": "A String", # Dataflow service generated name for this stage.
1497            "outputSource": [ # Output sources for this stage.
1498              { # Description of an input or output of an execution stage.
1499                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1500                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1501                    # source is most closely associated.
1502                "name": "A String", # Dataflow service generated name for this source.
1503                "sizeBytes": "A String", # Size of the source, if measurable.
1504              },
1505            ],
1506            "inputSource": [ # Input sources for this stage.
1507              { # Description of an input or output of an execution stage.
1508                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1509                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1510                    # source is most closely associated.
1511                "name": "A String", # Dataflow service generated name for this source.
1512                "sizeBytes": "A String", # Size of the source, if measurable.
1513              },
1514            ],
1515            "componentTransform": [ # Transforms that comprise this execution stage.
1516              { # Description of a transform executed as part of an execution stage.
1517                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1518                "originalTransform": "A String", # User name for the original user transform with which this transform is
1519                    # most closely associated.
1520                "name": "A String", # Dataflow service generated name for this source.
1521              },
1522            ],
1523            "id": "A String", # Dataflow service generated id for this stage.
1524          },
1525        ],
1526      },
1527      "steps": [ # The top-level steps that constitute the entire job.
1528        { # Defines a particular step within a Cloud Dataflow job.
1529            #
1530            # A job consists of multiple steps, each of which performs some
1531            # specific operation as part of the overall job.  Data is typically
1532            # passed from one step to another as part of the job.
1533            #
1534            # Here's an example of a sequence of steps which together implement a
1535            # Map-Reduce job:
1536            #
1537            #   * Read a collection of data from some source, parsing the
1538            #     collection's elements.
1539            #
1540            #   * Validate the elements.
1541            #
1542            #   * Apply a user-defined function to map each element to some value
1543            #     and extract an element-specific key value.
1544            #
1545            #   * Group elements with the same key into a single element with
1546            #     that key, transforming a multiply-keyed collection into a
1547            #     uniquely-keyed collection.
1548            #
1549            #   * Write the elements out to some data sink.
1550            #
1551            # Note that the Cloud Dataflow service may be used to run many different
1552            # types of jobs, not just Map-Reduce.
1553          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1554          "properties": { # Named properties associated with the step. Each kind of
1555              # predefined step has its own required set of properties.
1556              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1557            "a_key": "", # Properties of the object.
1558          },
1559          "name": "A String", # The name that identifies the step. This must be unique for each
1560              # step with respect to all other steps in the Cloud Dataflow job.
1561        },
1562      ],
1563      "currentStateTime": "A String", # The timestamp associated with the current state.
1564      "tempFiles": [ # A set of files the system should be aware of that are used
1565          # for temporary storage. These temporary files will be
1566          # removed on job completion.
1567          # No duplicates are allowed.
1568          # No file patterns are supported.
1569          #
1570          # The supported files are:
1571          #
1572          # Google Cloud Storage:
1573          #
1574          #    storage.googleapis.com/{bucket}/{object}
1575          #    bucket.storage.googleapis.com/{object}
1576        "A String",
1577      ],
1578      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1579          # callers cannot mutate it.
1580        { # A message describing the state of a particular execution stage.
1581          "executionStageName": "A String", # The name of the execution stage.
1582          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1583          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1584        },
1585      ],
1586      "type": "A String", # The type of Cloud Dataflow job.
1587      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1588          # Cloud Dataflow service.
1589      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1590          # of the job it replaced.
1591          #
1592          # When sending a `CreateJobRequest`, you can update a job by specifying it
1593          # here. The job named here is stopped, and its intermediate state is
1594          # transferred to this job.
1595      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1596          # isn't contained in the submitted job.
1597        "stages": { # A mapping from each stage to the information about that stage.
1598          "a_key": { # Contains information about how a particular
1599              # google.dataflow.v1beta3.Step will be executed.
1600            "stepName": [ # The steps associated with the execution stage.
1601                # Note that stages may have several steps, and that a given step
1602                # might be run by more than one stage.
1603              "A String",
1604            ],
1605          },
1606        },
1607      },
1608    }</pre>
1609</div>
1610
1611<div class="method">
1612    <code class="details" id="getMetrics">getMetrics(projectId, location, jobId, startTime=None, x__xgafv=None)</code>
1613  <pre>Request the job status.
1614
1615Args:
1616  projectId: string, A project id. (required)
1617  location: string, The location which contains the job specified by job_id. (required)
1618  jobId: string, The job to get messages for. (required)
1619  startTime: string, Return only metric data that has changed since this time.
1620Default is to return all information about all metrics for the job.
1621  x__xgafv: string, V1 error format.
1622    Allowed values
1623      1 - v1 error format
1624      2 - v2 error format
1625
1626Returns:
1627  An object of the form:
1628
1629    { # JobMetrics contains a collection of metrics descibing the detailed progress
1630      # of a Dataflow job. Metrics correspond to user-defined and system-defined
1631      # metrics in the job.
1632      #
1633      # This resource captures only the most recent values of each metric;
1634      # time-series data can be queried for them (under the same metric names)
1635      # from Cloud Monitoring.
1636    "metrics": [ # All metrics for this job.
1637      { # Describes the state of a metric.
1638        "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1639            # This holds the count of the aggregated values and is used in combination
1640            # with mean_sum above to obtain the actual mean aggregate value.
1641            # The only possible value type is Long.
1642        "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1643            # reporting work progress; it will be filled in responses from the
1644            # metrics API.
1645        "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
1646            # possible value type is a list of Values whose type can be Long, Double,
1647            # or String, according to the metric's type.  All Values in the list must
1648            # be of the same type.
1649        "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1650            # metric.
1651          "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1652              # will be "dataflow" for metrics defined by the Dataflow service or SDK.
1653          "name": "A String", # Worker-defined metric name.
1654          "context": { # Zero or more labeled fields which identify the part of the job this
1655              # metric is associated with, such as the name of a step or collection.
1656              #
1657              # For example, built-in counters associated with steps will have
1658              # context['step'] = <step-name>. Counters associated with PCollections
1659              # in the SDK will have context['pcollection'] = <pcollection-name>.
1660            "a_key": "A String",
1661          },
1662        },
1663        "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1664            # value accumulated since the worker started working on this WorkItem.
1665            # By default this is false, indicating that this metric is reported
1666            # as a delta that is not associated with any WorkItem.
1667        "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
1668            # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
1669            # The specified aggregation kind is case-insensitive.
1670            #
1671            # If omitted, this is not an aggregated value but instead
1672            # a single metric sample value.
1673        "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1674            # "And", and "Or".  The possible value types are Long, Double, and Boolean.
1675        "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1676            # This holds the sum of the aggregated values and is used in combination
1677            # with mean_count below to obtain the actual mean aggregate value.
1678            # The only possible value types are Long and Double.
1679        "distribution": "", # A struct value describing properties of a distribution of numeric values.
1680        "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1681            # service.
1682      },
1683    ],
1684    "metricTime": "A String", # Timestamp as of which metric values are current.
1685  }</pre>
1686</div>
1687
1688<div class="method">
1689    <code class="details" id="list">list(projectId, location, pageSize=None, x__xgafv=None, pageToken=None, filter=None, view=None)</code>
1690  <pre>List the jobs of a project.
1691
1692Args:
1693  projectId: string, The project which owns the jobs. (required)
1694  location: string, The location that contains this job. (required)
1695  pageSize: integer, If there are many jobs, limit response to at most this many.
1696The actual number of jobs returned will be the lesser of max_responses
1697and an unspecified server-defined limit.
1698  x__xgafv: string, V1 error format.
1699    Allowed values
1700      1 - v1 error format
1701      2 - v2 error format
1702  pageToken: string, Set this to the 'next_page_token' field of a previous response
1703to request additional results in a long list.
1704  filter: string, The kind of filter to use.
1705  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
1706
1707Returns:
1708  An object of the form:
1709
1710    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
1711      # response, depending on the page size in the ListJobsRequest.
1712    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
1713    "failedLocation": [ # Zero or more messages describing locations that failed to respond.
1714      { # Indicates which location failed to respond to a request for data.
1715        "name": "A String", # The name of the failed location.
1716      },
1717    ],
1718    "jobs": [ # A subset of the requested job information.
1719      { # Defines a job to be run by the Cloud Dataflow service.
1720          "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1721              # If this field is set, the service will ensure its uniqueness.
1722              # The request to create a job will fail if the service has knowledge of a
1723              # previously submitted job with the same client's ID and job name.
1724              # The caller may use this field to ensure idempotence of job
1725              # creation across retried attempts to create a job.
1726              # By default, the field is empty and, in that case, the service ignores it.
1727          "requestedState": "A String", # The job's requested state.
1728              #
1729              # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1730              # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1731              # also be used to directly set a job's requested state to
1732              # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1733              # job if it has not already reached a terminal state.
1734          "name": "A String", # The user-specified Cloud Dataflow job name.
1735              #
1736              # Only one Job with a given name may exist in a project at any
1737              # given time. If a caller attempts to create a Job with the same
1738              # name as an already-existing Job, the attempt returns the
1739              # existing Job.
1740              #
1741              # The name must match the regular expression
1742              # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1743          "location": "A String", # The location that contains this job.
1744          "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1745              # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1746          "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1747          "currentState": "A String", # The current state of the job.
1748              #
1749              # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1750              # specified.
1751              #
1752              # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1753              # terminal state. After a job has reached a terminal state, no
1754              # further state updates may be made.
1755              #
1756              # This field may be mutated by the Cloud Dataflow service;
1757              # callers cannot mutate it.
1758          "labels": { # User-defined labels for this job.
1759              #
1760              # The labels map can contain no more than 64 entries.  Entries of the labels
1761              # map are UTF8 strings that comply with the following restrictions:
1762              #
1763              # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1764              # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1765              # * Both keys and values are additionally constrained to be <= 128 bytes in
1766              # size.
1767            "a_key": "A String",
1768          },
1769          "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1770              # corresponding name prefixes of the new job.
1771            "a_key": "A String",
1772          },
1773          "id": "A String", # The unique ID of this job.
1774              #
1775              # This field is set by the Cloud Dataflow service when the Job is
1776              # created, and is immutable for the life of the job.
1777          "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1778            "version": { # A structure describing which components and their versions of the service
1779                # are required in order to run the job.
1780              "a_key": "", # Properties of the object.
1781            },
1782            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1783                # storage.  The system will append the suffix "/temp-{JOBNAME} to
1784                # this resource prefix, where {JOBNAME} is the value of the
1785                # job_name field.  The resulting bucket and object prefix is used
1786                # as the prefix of the resources used to store temporary data
1787                # needed during the job execution.  NOTE: This will override the
1788                # value in taskrunner_settings.
1789                # The supported resource type is:
1790                #
1791                # Google Cloud Storage:
1792                #
1793                #   storage.googleapis.com/{bucket}/{object}
1794                #   bucket.storage.googleapis.com/{object}
1795            "internalExperiments": { # Experimental settings.
1796              "a_key": "", # Properties of the object. Contains field @type with type URL.
1797            },
1798            "dataset": "A String", # The dataset for the current project where various workflow
1799                # related tables are stored.
1800                #
1801                # The supported resource type is:
1802                #
1803                # Google BigQuery:
1804                #   bigquery.googleapis.com/{dataset}
1805            "experiments": [ # The list of experiments to enable.
1806              "A String",
1807            ],
1808            "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1809            "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1810                # options are passed through the service and are used to recreate the
1811                # SDK pipeline options on the worker in a language agnostic and platform
1812                # independent way.
1813              "a_key": "", # Properties of the object.
1814            },
1815            "userAgent": { # A description of the process that generated the request.
1816              "a_key": "", # Properties of the object.
1817            },
1818            "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1819                # unspecified, the service will attempt to choose a reasonable
1820                # default.  This should be in the form of the API service name,
1821                # e.g. "compute.googleapis.com".
1822            "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1823                # specified in order for the job to have workers.
1824              { # Describes one particular pool of Cloud Dataflow workers to be
1825                  # instantiated by the Cloud Dataflow service in order to perform the
1826                  # computations required by a job.  Note that a workflow job may use
1827                  # multiple pools, in order to match the various computational
1828                  # requirements of the various stages of the job.
1829                "diskSourceImage": "A String", # Fully qualified source image for disks.
1830                "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1831                    # using the standard Dataflow task runner.  Users should ignore
1832                    # this field.
1833                  "workflowFileName": "A String", # The file to store the workflow in.
1834                  "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1835                      # will not be uploaded.
1836                      #
1837                      # The supported resource type is:
1838                      #
1839                      # Google Cloud Storage:
1840                      #   storage.googleapis.com/{bucket}/{object}
1841                      #   bucket.storage.googleapis.com/{object}
1842                  "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1843                  "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1844                    "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1845                    "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1846                        # "shuffle/v1beta1".
1847                    "workerId": "A String", # The ID of the worker running this pipeline.
1848                    "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1849                        #
1850                        # When workers access Google Cloud APIs, they logically do so via
1851                        # relative URLs.  If this field is specified, it supplies the base
1852                        # URL to use for resolving these relative URLs.  The normative
1853                        # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1854                        # Locators".
1855                        #
1856                        # If not specified, the default value is "http://www.googleapis.com/"
1857                    "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1858                        # "dataflow/v1b3/projects".
1859                    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1860                        # storage.
1861                        #
1862                        # The supported resource type is:
1863                        #
1864                        # Google Cloud Storage:
1865                        #
1866                        #   storage.googleapis.com/{bucket}/{object}
1867                        #   bucket.storage.googleapis.com/{object}
1868                  },
1869                  "vmId": "A String", # The ID string of the VM.
1870                  "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1871                  "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1872                  "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1873                      # access the Cloud Dataflow API.
1874                    "A String",
1875                  ],
1876                  "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1877                      # taskrunner; e.g. "root".
1878                  "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1879                      #
1880                      # When workers access Google Cloud APIs, they logically do so via
1881                      # relative URLs.  If this field is specified, it supplies the base
1882                      # URL to use for resolving these relative URLs.  The normative
1883                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1884                      # Locators".
1885                      #
1886                      # If not specified, the default value is "http://www.googleapis.com/"
1887                  "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1888                      # taskrunner; e.g. "wheel".
1889                  "languageHint": "A String", # The suggested backend language.
1890                  "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1891                      # console.
1892                  "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1893                  "logDir": "A String", # The directory on the VM to store logs.
1894                  "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1895                  "harnessCommand": "A String", # The command to launch the worker harness.
1896                  "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1897                      # temporary storage.
1898                      #
1899                      # The supported resource type is:
1900                      #
1901                      # Google Cloud Storage:
1902                      #   storage.googleapis.com/{bucket}/{object}
1903                      #   bucket.storage.googleapis.com/{object}
1904                  "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1905                },
1906                "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1907                    # are supported.
1908                "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1909                    # service will attempt to choose a reasonable default.
1910                "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1911                    # the service will use the network "default".
1912                "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1913                    # will attempt to choose a reasonable default.
1914                "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1915                    # attempt to choose a reasonable default.
1916                "dataDisks": [ # Data disks that are used by a VM in this workflow.
1917                  { # Describes the data disk used by a workflow job.
1918                    "mountPoint": "A String", # Directory in a VM where disk is mounted.
1919                    "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1920                        # attempt to choose a reasonable default.
1921                    "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1922                        # must be a disk type appropriate to the project and zone in which
1923                        # the workers will run.  If unknown or unspecified, the service
1924                        # will attempt to choose a reasonable default.
1925                        #
1926                        # For example, the standard persistent disk type is a resource name
1927                        # typically ending in "pd-standard".  If SSD persistent disks are
1928                        # available, the resource name typically ends with "pd-ssd".  The
1929                        # actual valid values are defined the Google Compute Engine API,
1930                        # not by the Cloud Dataflow API; consult the Google Compute Engine
1931                        # documentation for more information about determining the set of
1932                        # available disk types for a particular project and zone.
1933                        #
1934                        # Google Compute Engine Disk types are local to a particular
1935                        # project in a particular zone, and so the resource name will
1936                        # typically look something like this:
1937                        #
1938                        # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1939                  },
1940                ],
1941                "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1942                    # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1943                    # `TEARDOWN_NEVER`.
1944                    # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1945                    # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1946                    # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1947                    # down.
1948                    #
1949                    # If the workers are not torn down by the service, they will
1950                    # continue to run and use Google Compute Engine VM resources in the
1951                    # user's project until they are explicitly terminated by the user.
1952                    # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1953                    # policy except for small, manually supervised test jobs.
1954                    #
1955                    # If unknown or unspecified, the service will attempt to choose a reasonable
1956                    # default.
1957                "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1958                    # Compute Engine API.
1959                "ipConfiguration": "A String", # Configuration for VM IPs.
1960                "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1961                    # service will choose a number of threads (according to the number of cores
1962                    # on the selected machine type for batch, or 1 by convention for streaming).
1963                "poolArgs": { # Extra arguments for this worker pool.
1964                  "a_key": "", # Properties of the object. Contains field @type with type URL.
1965                },
1966                "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1967                    # execute the job.  If zero or unspecified, the service will
1968                    # attempt to choose a reasonable default.
1969                "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1970                    # harness, residing in Google Container Registry.
1971                "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1972                    # the form "regions/REGION/subnetworks/SUBNETWORK".
1973                "packages": [ # Packages to be installed on workers.
1974                  { # The packages that must be installed in order for a worker to run the
1975                      # steps of the Cloud Dataflow job that will be assigned to its worker
1976                      # pool.
1977                      #
1978                      # This is the mechanism by which the Cloud Dataflow SDK causes code to
1979                      # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1980                      # might use this to install jars containing the user's code and all of the
1981                      # various dependencies (libraries, data files, etc.) required in order
1982                      # for that code to run.
1983                    "location": "A String", # The resource to read the package from. The supported resource type is:
1984                        #
1985                        # Google Cloud Storage:
1986                        #
1987                        #   storage.googleapis.com/{bucket}
1988                        #   bucket.storage.googleapis.com/
1989                    "name": "A String", # The name of the package.
1990                  },
1991                ],
1992                "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1993                  "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1994                  "algorithm": "A String", # The algorithm to use for autoscaling.
1995                },
1996                "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1997                    # select a default set of packages which are useful to worker
1998                    # harnesses written in a particular language.
1999                "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2000                    # attempt to choose a reasonable default.
2001                "metadata": { # Metadata to set on the Google Compute Engine VMs.
2002                  "a_key": "A String",
2003                },
2004              },
2005            ],
2006          },
2007          "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2008              # A description of the user pipeline and stages through which it is executed.
2009              # Created by Cloud Dataflow service.  Only retrieved with
2010              # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2011              # form.  This data is provided by the Dataflow service for ease of visualizing
2012              # the pipeline and interpretting Dataflow provided metrics.
2013            "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2014              { # Description of the type, names/ids, and input/outputs for a transform.
2015                "kind": "A String", # Type of transform.
2016                "name": "A String", # User provided name for this transform instance.
2017                "inputCollectionName": [ # User names for all collection inputs to this transform.
2018                  "A String",
2019                ],
2020                "displayData": [ # Transform-specific display data.
2021                  { # Data provided with a pipeline or transform to provide descriptive info.
2022                    "shortStrValue": "A String", # A possible additional shorter value to display.
2023                        # For example a java_class_name_value of com.mypackage.MyDoFn
2024                        # will be stored with MyDoFn as the short_str_value and
2025                        # com.mypackage.MyDoFn as the java_class_name value.
2026                        # short_str_value can be displayed and java_class_name_value
2027                        # will be displayed as a tooltip.
2028                    "durationValue": "A String", # Contains value if the data is of duration type.
2029                    "url": "A String", # An optional full URL.
2030                    "floatValue": 3.14, # Contains value if the data is of float type.
2031                    "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2032                        # language namespace (i.e. python module) which defines the display data.
2033                        # This allows a dax monitoring system to specially handle the data
2034                        # and perform custom rendering.
2035                    "javaClassValue": "A String", # Contains value if the data is of java class type.
2036                    "label": "A String", # An optional label to display in a dax UI for the element.
2037                    "boolValue": True or False, # Contains value if the data is of a boolean type.
2038                    "strValue": "A String", # Contains value if the data is of string type.
2039                    "key": "A String", # The key identifying the display data.
2040                        # This is intended to be used as a label for the display data
2041                        # when viewed in a dax monitoring system.
2042                    "int64Value": "A String", # Contains value if the data is of int64 type.
2043                    "timestampValue": "A String", # Contains value if the data is of timestamp type.
2044                  },
2045                ],
2046                "outputCollectionName": [ # User  names for all collection outputs to this transform.
2047                  "A String",
2048                ],
2049                "id": "A String", # SDK generated id of this transform instance.
2050              },
2051            ],
2052            "displayData": [ # Pipeline level display data.
2053              { # Data provided with a pipeline or transform to provide descriptive info.
2054                "shortStrValue": "A String", # A possible additional shorter value to display.
2055                    # For example a java_class_name_value of com.mypackage.MyDoFn
2056                    # will be stored with MyDoFn as the short_str_value and
2057                    # com.mypackage.MyDoFn as the java_class_name value.
2058                    # short_str_value can be displayed and java_class_name_value
2059                    # will be displayed as a tooltip.
2060                "durationValue": "A String", # Contains value if the data is of duration type.
2061                "url": "A String", # An optional full URL.
2062                "floatValue": 3.14, # Contains value if the data is of float type.
2063                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2064                    # language namespace (i.e. python module) which defines the display data.
2065                    # This allows a dax monitoring system to specially handle the data
2066                    # and perform custom rendering.
2067                "javaClassValue": "A String", # Contains value if the data is of java class type.
2068                "label": "A String", # An optional label to display in a dax UI for the element.
2069                "boolValue": True or False, # Contains value if the data is of a boolean type.
2070                "strValue": "A String", # Contains value if the data is of string type.
2071                "key": "A String", # The key identifying the display data.
2072                    # This is intended to be used as a label for the display data
2073                    # when viewed in a dax monitoring system.
2074                "int64Value": "A String", # Contains value if the data is of int64 type.
2075                "timestampValue": "A String", # Contains value if the data is of timestamp type.
2076              },
2077            ],
2078            "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2079              { # Description of the composing transforms, names/ids, and input/outputs of a
2080                  # stage of execution.  Some composing transforms and sources may have been
2081                  # generated by the Dataflow service during execution planning.
2082                "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2083                  { # Description of an interstitial value between transforms in an execution
2084                      # stage.
2085                    "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2086                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2087                        # source is most closely associated.
2088                    "name": "A String", # Dataflow service generated name for this source.
2089                  },
2090                ],
2091                "kind": "A String", # Type of tranform this stage is executing.
2092                "name": "A String", # Dataflow service generated name for this stage.
2093                "outputSource": [ # Output sources for this stage.
2094                  { # Description of an input or output of an execution stage.
2095                    "userName": "A String", # Human-readable name for this source; may be user or system generated.
2096                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2097                        # source is most closely associated.
2098                    "name": "A String", # Dataflow service generated name for this source.
2099                    "sizeBytes": "A String", # Size of the source, if measurable.
2100                  },
2101                ],
2102                "inputSource": [ # Input sources for this stage.
2103                  { # Description of an input or output of an execution stage.
2104                    "userName": "A String", # Human-readable name for this source; may be user or system generated.
2105                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2106                        # source is most closely associated.
2107                    "name": "A String", # Dataflow service generated name for this source.
2108                    "sizeBytes": "A String", # Size of the source, if measurable.
2109                  },
2110                ],
2111                "componentTransform": [ # Transforms that comprise this execution stage.
2112                  { # Description of a transform executed as part of an execution stage.
2113                    "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2114                    "originalTransform": "A String", # User name for the original user transform with which this transform is
2115                        # most closely associated.
2116                    "name": "A String", # Dataflow service generated name for this source.
2117                  },
2118                ],
2119                "id": "A String", # Dataflow service generated id for this stage.
2120              },
2121            ],
2122          },
2123          "steps": [ # The top-level steps that constitute the entire job.
2124            { # Defines a particular step within a Cloud Dataflow job.
2125                #
2126                # A job consists of multiple steps, each of which performs some
2127                # specific operation as part of the overall job.  Data is typically
2128                # passed from one step to another as part of the job.
2129                #
2130                # Here's an example of a sequence of steps which together implement a
2131                # Map-Reduce job:
2132                #
2133                #   * Read a collection of data from some source, parsing the
2134                #     collection's elements.
2135                #
2136                #   * Validate the elements.
2137                #
2138                #   * Apply a user-defined function to map each element to some value
2139                #     and extract an element-specific key value.
2140                #
2141                #   * Group elements with the same key into a single element with
2142                #     that key, transforming a multiply-keyed collection into a
2143                #     uniquely-keyed collection.
2144                #
2145                #   * Write the elements out to some data sink.
2146                #
2147                # Note that the Cloud Dataflow service may be used to run many different
2148                # types of jobs, not just Map-Reduce.
2149              "kind": "A String", # The kind of step in the Cloud Dataflow job.
2150              "properties": { # Named properties associated with the step. Each kind of
2151                  # predefined step has its own required set of properties.
2152                  # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2153                "a_key": "", # Properties of the object.
2154              },
2155              "name": "A String", # The name that identifies the step. This must be unique for each
2156                  # step with respect to all other steps in the Cloud Dataflow job.
2157            },
2158          ],
2159          "currentStateTime": "A String", # The timestamp associated with the current state.
2160          "tempFiles": [ # A set of files the system should be aware of that are used
2161              # for temporary storage. These temporary files will be
2162              # removed on job completion.
2163              # No duplicates are allowed.
2164              # No file patterns are supported.
2165              #
2166              # The supported files are:
2167              #
2168              # Google Cloud Storage:
2169              #
2170              #    storage.googleapis.com/{bucket}/{object}
2171              #    bucket.storage.googleapis.com/{object}
2172            "A String",
2173          ],
2174          "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2175              # callers cannot mutate it.
2176            { # A message describing the state of a particular execution stage.
2177              "executionStageName": "A String", # The name of the execution stage.
2178              "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2179              "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2180            },
2181          ],
2182          "type": "A String", # The type of Cloud Dataflow job.
2183          "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2184              # Cloud Dataflow service.
2185          "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2186              # of the job it replaced.
2187              #
2188              # When sending a `CreateJobRequest`, you can update a job by specifying it
2189              # here. The job named here is stopped, and its intermediate state is
2190              # transferred to this job.
2191          "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2192              # isn't contained in the submitted job.
2193            "stages": { # A mapping from each stage to the information about that stage.
2194              "a_key": { # Contains information about how a particular
2195                  # google.dataflow.v1beta3.Step will be executed.
2196                "stepName": [ # The steps associated with the execution stage.
2197                    # Note that stages may have several steps, and that a given step
2198                    # might be run by more than one stage.
2199                  "A String",
2200                ],
2201              },
2202            },
2203          },
2204        },
2205    ],
2206  }</pre>
2207</div>
2208
2209<div class="method">
2210    <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2211  <pre>Retrieves the next page of results.
2212
2213Args:
2214  previous_request: The request for the previous page. (required)
2215  previous_response: The response from the request for the previous page. (required)
2216
2217Returns:
2218  A request object that you can call 'execute()' on to request the next
2219  page. Returns None if there are no more items in the collection.
2220    </pre>
2221</div>
2222
2223<div class="method">
2224    <code class="details" id="update">update(projectId, location, jobId, body, x__xgafv=None)</code>
2225  <pre>Updates the state of an existing Cloud Dataflow job.
2226
2227Args:
2228  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2229  location: string, The location that contains this job. (required)
2230  jobId: string, The job ID. (required)
2231  body: object, The request body. (required)
2232    The object takes the form of:
2233
2234{ # Defines a job to be run by the Cloud Dataflow service.
2235    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2236        # If this field is set, the service will ensure its uniqueness.
2237        # The request to create a job will fail if the service has knowledge of a
2238        # previously submitted job with the same client's ID and job name.
2239        # The caller may use this field to ensure idempotence of job
2240        # creation across retried attempts to create a job.
2241        # By default, the field is empty and, in that case, the service ignores it.
2242    "requestedState": "A String", # The job's requested state.
2243        #
2244        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2245        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2246        # also be used to directly set a job's requested state to
2247        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2248        # job if it has not already reached a terminal state.
2249    "name": "A String", # The user-specified Cloud Dataflow job name.
2250        #
2251        # Only one Job with a given name may exist in a project at any
2252        # given time. If a caller attempts to create a Job with the same
2253        # name as an already-existing Job, the attempt returns the
2254        # existing Job.
2255        #
2256        # The name must match the regular expression
2257        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2258    "location": "A String", # The location that contains this job.
2259    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2260        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2261    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2262    "currentState": "A String", # The current state of the job.
2263        #
2264        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2265        # specified.
2266        #
2267        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2268        # terminal state. After a job has reached a terminal state, no
2269        # further state updates may be made.
2270        #
2271        # This field may be mutated by the Cloud Dataflow service;
2272        # callers cannot mutate it.
2273    "labels": { # User-defined labels for this job.
2274        #
2275        # The labels map can contain no more than 64 entries.  Entries of the labels
2276        # map are UTF8 strings that comply with the following restrictions:
2277        #
2278        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2279        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2280        # * Both keys and values are additionally constrained to be <= 128 bytes in
2281        # size.
2282      "a_key": "A String",
2283    },
2284    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2285        # corresponding name prefixes of the new job.
2286      "a_key": "A String",
2287    },
2288    "id": "A String", # The unique ID of this job.
2289        #
2290        # This field is set by the Cloud Dataflow service when the Job is
2291        # created, and is immutable for the life of the job.
2292    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2293      "version": { # A structure describing which components and their versions of the service
2294          # are required in order to run the job.
2295        "a_key": "", # Properties of the object.
2296      },
2297      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2298          # storage.  The system will append the suffix "/temp-{JOBNAME} to
2299          # this resource prefix, where {JOBNAME} is the value of the
2300          # job_name field.  The resulting bucket and object prefix is used
2301          # as the prefix of the resources used to store temporary data
2302          # needed during the job execution.  NOTE: This will override the
2303          # value in taskrunner_settings.
2304          # The supported resource type is:
2305          #
2306          # Google Cloud Storage:
2307          #
2308          #   storage.googleapis.com/{bucket}/{object}
2309          #   bucket.storage.googleapis.com/{object}
2310      "internalExperiments": { # Experimental settings.
2311        "a_key": "", # Properties of the object. Contains field @type with type URL.
2312      },
2313      "dataset": "A String", # The dataset for the current project where various workflow
2314          # related tables are stored.
2315          #
2316          # The supported resource type is:
2317          #
2318          # Google BigQuery:
2319          #   bigquery.googleapis.com/{dataset}
2320      "experiments": [ # The list of experiments to enable.
2321        "A String",
2322      ],
2323      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2324      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2325          # options are passed through the service and are used to recreate the
2326          # SDK pipeline options on the worker in a language agnostic and platform
2327          # independent way.
2328        "a_key": "", # Properties of the object.
2329      },
2330      "userAgent": { # A description of the process that generated the request.
2331        "a_key": "", # Properties of the object.
2332      },
2333      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2334          # unspecified, the service will attempt to choose a reasonable
2335          # default.  This should be in the form of the API service name,
2336          # e.g. "compute.googleapis.com".
2337      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2338          # specified in order for the job to have workers.
2339        { # Describes one particular pool of Cloud Dataflow workers to be
2340            # instantiated by the Cloud Dataflow service in order to perform the
2341            # computations required by a job.  Note that a workflow job may use
2342            # multiple pools, in order to match the various computational
2343            # requirements of the various stages of the job.
2344          "diskSourceImage": "A String", # Fully qualified source image for disks.
2345          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2346              # using the standard Dataflow task runner.  Users should ignore
2347              # this field.
2348            "workflowFileName": "A String", # The file to store the workflow in.
2349            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2350                # will not be uploaded.
2351                #
2352                # The supported resource type is:
2353                #
2354                # Google Cloud Storage:
2355                #   storage.googleapis.com/{bucket}/{object}
2356                #   bucket.storage.googleapis.com/{object}
2357            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2358            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2359              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2360              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2361                  # "shuffle/v1beta1".
2362              "workerId": "A String", # The ID of the worker running this pipeline.
2363              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2364                  #
2365                  # When workers access Google Cloud APIs, they logically do so via
2366                  # relative URLs.  If this field is specified, it supplies the base
2367                  # URL to use for resolving these relative URLs.  The normative
2368                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2369                  # Locators".
2370                  #
2371                  # If not specified, the default value is "http://www.googleapis.com/"
2372              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2373                  # "dataflow/v1b3/projects".
2374              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2375                  # storage.
2376                  #
2377                  # The supported resource type is:
2378                  #
2379                  # Google Cloud Storage:
2380                  #
2381                  #   storage.googleapis.com/{bucket}/{object}
2382                  #   bucket.storage.googleapis.com/{object}
2383            },
2384            "vmId": "A String", # The ID string of the VM.
2385            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2386            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2387            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2388                # access the Cloud Dataflow API.
2389              "A String",
2390            ],
2391            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2392                # taskrunner; e.g. "root".
2393            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2394                #
2395                # When workers access Google Cloud APIs, they logically do so via
2396                # relative URLs.  If this field is specified, it supplies the base
2397                # URL to use for resolving these relative URLs.  The normative
2398                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2399                # Locators".
2400                #
2401                # If not specified, the default value is "http://www.googleapis.com/"
2402            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2403                # taskrunner; e.g. "wheel".
2404            "languageHint": "A String", # The suggested backend language.
2405            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2406                # console.
2407            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2408            "logDir": "A String", # The directory on the VM to store logs.
2409            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2410            "harnessCommand": "A String", # The command to launch the worker harness.
2411            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2412                # temporary storage.
2413                #
2414                # The supported resource type is:
2415                #
2416                # Google Cloud Storage:
2417                #   storage.googleapis.com/{bucket}/{object}
2418                #   bucket.storage.googleapis.com/{object}
2419            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2420          },
2421          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2422              # are supported.
2423          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2424              # service will attempt to choose a reasonable default.
2425          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2426              # the service will use the network "default".
2427          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2428              # will attempt to choose a reasonable default.
2429          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2430              # attempt to choose a reasonable default.
2431          "dataDisks": [ # Data disks that are used by a VM in this workflow.
2432            { # Describes the data disk used by a workflow job.
2433              "mountPoint": "A String", # Directory in a VM where disk is mounted.
2434              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2435                  # attempt to choose a reasonable default.
2436              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2437                  # must be a disk type appropriate to the project and zone in which
2438                  # the workers will run.  If unknown or unspecified, the service
2439                  # will attempt to choose a reasonable default.
2440                  #
2441                  # For example, the standard persistent disk type is a resource name
2442                  # typically ending in "pd-standard".  If SSD persistent disks are
2443                  # available, the resource name typically ends with "pd-ssd".  The
2444                  # actual valid values are defined the Google Compute Engine API,
2445                  # not by the Cloud Dataflow API; consult the Google Compute Engine
2446                  # documentation for more information about determining the set of
2447                  # available disk types for a particular project and zone.
2448                  #
2449                  # Google Compute Engine Disk types are local to a particular
2450                  # project in a particular zone, and so the resource name will
2451                  # typically look something like this:
2452                  #
2453                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2454            },
2455          ],
2456          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2457              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2458              # `TEARDOWN_NEVER`.
2459              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2460              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2461              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2462              # down.
2463              #
2464              # If the workers are not torn down by the service, they will
2465              # continue to run and use Google Compute Engine VM resources in the
2466              # user's project until they are explicitly terminated by the user.
2467              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2468              # policy except for small, manually supervised test jobs.
2469              #
2470              # If unknown or unspecified, the service will attempt to choose a reasonable
2471              # default.
2472          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2473              # Compute Engine API.
2474          "ipConfiguration": "A String", # Configuration for VM IPs.
2475          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2476              # service will choose a number of threads (according to the number of cores
2477              # on the selected machine type for batch, or 1 by convention for streaming).
2478          "poolArgs": { # Extra arguments for this worker pool.
2479            "a_key": "", # Properties of the object. Contains field @type with type URL.
2480          },
2481          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2482              # execute the job.  If zero or unspecified, the service will
2483              # attempt to choose a reasonable default.
2484          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2485              # harness, residing in Google Container Registry.
2486          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2487              # the form "regions/REGION/subnetworks/SUBNETWORK".
2488          "packages": [ # Packages to be installed on workers.
2489            { # The packages that must be installed in order for a worker to run the
2490                # steps of the Cloud Dataflow job that will be assigned to its worker
2491                # pool.
2492                #
2493                # This is the mechanism by which the Cloud Dataflow SDK causes code to
2494                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2495                # might use this to install jars containing the user's code and all of the
2496                # various dependencies (libraries, data files, etc.) required in order
2497                # for that code to run.
2498              "location": "A String", # The resource to read the package from. The supported resource type is:
2499                  #
2500                  # Google Cloud Storage:
2501                  #
2502                  #   storage.googleapis.com/{bucket}
2503                  #   bucket.storage.googleapis.com/
2504              "name": "A String", # The name of the package.
2505            },
2506          ],
2507          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2508            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2509            "algorithm": "A String", # The algorithm to use for autoscaling.
2510          },
2511          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
2512              # select a default set of packages which are useful to worker
2513              # harnesses written in a particular language.
2514          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2515              # attempt to choose a reasonable default.
2516          "metadata": { # Metadata to set on the Google Compute Engine VMs.
2517            "a_key": "A String",
2518          },
2519        },
2520      ],
2521    },
2522    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2523        # A description of the user pipeline and stages through which it is executed.
2524        # Created by Cloud Dataflow service.  Only retrieved with
2525        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2526        # form.  This data is provided by the Dataflow service for ease of visualizing
2527        # the pipeline and interpretting Dataflow provided metrics.
2528      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2529        { # Description of the type, names/ids, and input/outputs for a transform.
2530          "kind": "A String", # Type of transform.
2531          "name": "A String", # User provided name for this transform instance.
2532          "inputCollectionName": [ # User names for all collection inputs to this transform.
2533            "A String",
2534          ],
2535          "displayData": [ # Transform-specific display data.
2536            { # Data provided with a pipeline or transform to provide descriptive info.
2537              "shortStrValue": "A String", # A possible additional shorter value to display.
2538                  # For example a java_class_name_value of com.mypackage.MyDoFn
2539                  # will be stored with MyDoFn as the short_str_value and
2540                  # com.mypackage.MyDoFn as the java_class_name value.
2541                  # short_str_value can be displayed and java_class_name_value
2542                  # will be displayed as a tooltip.
2543              "durationValue": "A String", # Contains value if the data is of duration type.
2544              "url": "A String", # An optional full URL.
2545              "floatValue": 3.14, # Contains value if the data is of float type.
2546              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2547                  # language namespace (i.e. python module) which defines the display data.
2548                  # This allows a dax monitoring system to specially handle the data
2549                  # and perform custom rendering.
2550              "javaClassValue": "A String", # Contains value if the data is of java class type.
2551              "label": "A String", # An optional label to display in a dax UI for the element.
2552              "boolValue": True or False, # Contains value if the data is of a boolean type.
2553              "strValue": "A String", # Contains value if the data is of string type.
2554              "key": "A String", # The key identifying the display data.
2555                  # This is intended to be used as a label for the display data
2556                  # when viewed in a dax monitoring system.
2557              "int64Value": "A String", # Contains value if the data is of int64 type.
2558              "timestampValue": "A String", # Contains value if the data is of timestamp type.
2559            },
2560          ],
2561          "outputCollectionName": [ # User  names for all collection outputs to this transform.
2562            "A String",
2563          ],
2564          "id": "A String", # SDK generated id of this transform instance.
2565        },
2566      ],
2567      "displayData": [ # Pipeline level display data.
2568        { # Data provided with a pipeline or transform to provide descriptive info.
2569          "shortStrValue": "A String", # A possible additional shorter value to display.
2570              # For example a java_class_name_value of com.mypackage.MyDoFn
2571              # will be stored with MyDoFn as the short_str_value and
2572              # com.mypackage.MyDoFn as the java_class_name value.
2573              # short_str_value can be displayed and java_class_name_value
2574              # will be displayed as a tooltip.
2575          "durationValue": "A String", # Contains value if the data is of duration type.
2576          "url": "A String", # An optional full URL.
2577          "floatValue": 3.14, # Contains value if the data is of float type.
2578          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2579              # language namespace (i.e. python module) which defines the display data.
2580              # This allows a dax monitoring system to specially handle the data
2581              # and perform custom rendering.
2582          "javaClassValue": "A String", # Contains value if the data is of java class type.
2583          "label": "A String", # An optional label to display in a dax UI for the element.
2584          "boolValue": True or False, # Contains value if the data is of a boolean type.
2585          "strValue": "A String", # Contains value if the data is of string type.
2586          "key": "A String", # The key identifying the display data.
2587              # This is intended to be used as a label for the display data
2588              # when viewed in a dax monitoring system.
2589          "int64Value": "A String", # Contains value if the data is of int64 type.
2590          "timestampValue": "A String", # Contains value if the data is of timestamp type.
2591        },
2592      ],
2593      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2594        { # Description of the composing transforms, names/ids, and input/outputs of a
2595            # stage of execution.  Some composing transforms and sources may have been
2596            # generated by the Dataflow service during execution planning.
2597          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2598            { # Description of an interstitial value between transforms in an execution
2599                # stage.
2600              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2601              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2602                  # source is most closely associated.
2603              "name": "A String", # Dataflow service generated name for this source.
2604            },
2605          ],
2606          "kind": "A String", # Type of tranform this stage is executing.
2607          "name": "A String", # Dataflow service generated name for this stage.
2608          "outputSource": [ # Output sources for this stage.
2609            { # Description of an input or output of an execution stage.
2610              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2611              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2612                  # source is most closely associated.
2613              "name": "A String", # Dataflow service generated name for this source.
2614              "sizeBytes": "A String", # Size of the source, if measurable.
2615            },
2616          ],
2617          "inputSource": [ # Input sources for this stage.
2618            { # Description of an input or output of an execution stage.
2619              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2620              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2621                  # source is most closely associated.
2622              "name": "A String", # Dataflow service generated name for this source.
2623              "sizeBytes": "A String", # Size of the source, if measurable.
2624            },
2625          ],
2626          "componentTransform": [ # Transforms that comprise this execution stage.
2627            { # Description of a transform executed as part of an execution stage.
2628              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2629              "originalTransform": "A String", # User name for the original user transform with which this transform is
2630                  # most closely associated.
2631              "name": "A String", # Dataflow service generated name for this source.
2632            },
2633          ],
2634          "id": "A String", # Dataflow service generated id for this stage.
2635        },
2636      ],
2637    },
2638    "steps": [ # The top-level steps that constitute the entire job.
2639      { # Defines a particular step within a Cloud Dataflow job.
2640          #
2641          # A job consists of multiple steps, each of which performs some
2642          # specific operation as part of the overall job.  Data is typically
2643          # passed from one step to another as part of the job.
2644          #
2645          # Here's an example of a sequence of steps which together implement a
2646          # Map-Reduce job:
2647          #
2648          #   * Read a collection of data from some source, parsing the
2649          #     collection's elements.
2650          #
2651          #   * Validate the elements.
2652          #
2653          #   * Apply a user-defined function to map each element to some value
2654          #     and extract an element-specific key value.
2655          #
2656          #   * Group elements with the same key into a single element with
2657          #     that key, transforming a multiply-keyed collection into a
2658          #     uniquely-keyed collection.
2659          #
2660          #   * Write the elements out to some data sink.
2661          #
2662          # Note that the Cloud Dataflow service may be used to run many different
2663          # types of jobs, not just Map-Reduce.
2664        "kind": "A String", # The kind of step in the Cloud Dataflow job.
2665        "properties": { # Named properties associated with the step. Each kind of
2666            # predefined step has its own required set of properties.
2667            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2668          "a_key": "", # Properties of the object.
2669        },
2670        "name": "A String", # The name that identifies the step. This must be unique for each
2671            # step with respect to all other steps in the Cloud Dataflow job.
2672      },
2673    ],
2674    "currentStateTime": "A String", # The timestamp associated with the current state.
2675    "tempFiles": [ # A set of files the system should be aware of that are used
2676        # for temporary storage. These temporary files will be
2677        # removed on job completion.
2678        # No duplicates are allowed.
2679        # No file patterns are supported.
2680        #
2681        # The supported files are:
2682        #
2683        # Google Cloud Storage:
2684        #
2685        #    storage.googleapis.com/{bucket}/{object}
2686        #    bucket.storage.googleapis.com/{object}
2687      "A String",
2688    ],
2689    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2690        # callers cannot mutate it.
2691      { # A message describing the state of a particular execution stage.
2692        "executionStageName": "A String", # The name of the execution stage.
2693        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2694        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2695      },
2696    ],
2697    "type": "A String", # The type of Cloud Dataflow job.
2698    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2699        # Cloud Dataflow service.
2700    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2701        # of the job it replaced.
2702        #
2703        # When sending a `CreateJobRequest`, you can update a job by specifying it
2704        # here. The job named here is stopped, and its intermediate state is
2705        # transferred to this job.
2706    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2707        # isn't contained in the submitted job.
2708      "stages": { # A mapping from each stage to the information about that stage.
2709        "a_key": { # Contains information about how a particular
2710            # google.dataflow.v1beta3.Step will be executed.
2711          "stepName": [ # The steps associated with the execution stage.
2712              # Note that stages may have several steps, and that a given step
2713              # might be run by more than one stage.
2714            "A String",
2715          ],
2716        },
2717      },
2718    },
2719  }
2720
2721  x__xgafv: string, V1 error format.
2722    Allowed values
2723      1 - v1 error format
2724      2 - v2 error format
2725
2726Returns:
2727  An object of the form:
2728
2729    { # Defines a job to be run by the Cloud Dataflow service.
2730      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2731          # If this field is set, the service will ensure its uniqueness.
2732          # The request to create a job will fail if the service has knowledge of a
2733          # previously submitted job with the same client's ID and job name.
2734          # The caller may use this field to ensure idempotence of job
2735          # creation across retried attempts to create a job.
2736          # By default, the field is empty and, in that case, the service ignores it.
2737      "requestedState": "A String", # The job's requested state.
2738          #
2739          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2740          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2741          # also be used to directly set a job's requested state to
2742          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2743          # job if it has not already reached a terminal state.
2744      "name": "A String", # The user-specified Cloud Dataflow job name.
2745          #
2746          # Only one Job with a given name may exist in a project at any
2747          # given time. If a caller attempts to create a Job with the same
2748          # name as an already-existing Job, the attempt returns the
2749          # existing Job.
2750          #
2751          # The name must match the regular expression
2752          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2753      "location": "A String", # The location that contains this job.
2754      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2755          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2756      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2757      "currentState": "A String", # The current state of the job.
2758          #
2759          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2760          # specified.
2761          #
2762          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2763          # terminal state. After a job has reached a terminal state, no
2764          # further state updates may be made.
2765          #
2766          # This field may be mutated by the Cloud Dataflow service;
2767          # callers cannot mutate it.
2768      "labels": { # User-defined labels for this job.
2769          #
2770          # The labels map can contain no more than 64 entries.  Entries of the labels
2771          # map are UTF8 strings that comply with the following restrictions:
2772          #
2773          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2774          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2775          # * Both keys and values are additionally constrained to be <= 128 bytes in
2776          # size.
2777        "a_key": "A String",
2778      },
2779      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2780          # corresponding name prefixes of the new job.
2781        "a_key": "A String",
2782      },
2783      "id": "A String", # The unique ID of this job.
2784          #
2785          # This field is set by the Cloud Dataflow service when the Job is
2786          # created, and is immutable for the life of the job.
2787      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2788        "version": { # A structure describing which components and their versions of the service
2789            # are required in order to run the job.
2790          "a_key": "", # Properties of the object.
2791        },
2792        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2793            # storage.  The system will append the suffix "/temp-{JOBNAME} to
2794            # this resource prefix, where {JOBNAME} is the value of the
2795            # job_name field.  The resulting bucket and object prefix is used
2796            # as the prefix of the resources used to store temporary data
2797            # needed during the job execution.  NOTE: This will override the
2798            # value in taskrunner_settings.
2799            # The supported resource type is:
2800            #
2801            # Google Cloud Storage:
2802            #
2803            #   storage.googleapis.com/{bucket}/{object}
2804            #   bucket.storage.googleapis.com/{object}
2805        "internalExperiments": { # Experimental settings.
2806          "a_key": "", # Properties of the object. Contains field @type with type URL.
2807        },
2808        "dataset": "A String", # The dataset for the current project where various workflow
2809            # related tables are stored.
2810            #
2811            # The supported resource type is:
2812            #
2813            # Google BigQuery:
2814            #   bigquery.googleapis.com/{dataset}
2815        "experiments": [ # The list of experiments to enable.
2816          "A String",
2817        ],
2818        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2819        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2820            # options are passed through the service and are used to recreate the
2821            # SDK pipeline options on the worker in a language agnostic and platform
2822            # independent way.
2823          "a_key": "", # Properties of the object.
2824        },
2825        "userAgent": { # A description of the process that generated the request.
2826          "a_key": "", # Properties of the object.
2827        },
2828        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2829            # unspecified, the service will attempt to choose a reasonable
2830            # default.  This should be in the form of the API service name,
2831            # e.g. "compute.googleapis.com".
2832        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2833            # specified in order for the job to have workers.
2834          { # Describes one particular pool of Cloud Dataflow workers to be
2835              # instantiated by the Cloud Dataflow service in order to perform the
2836              # computations required by a job.  Note that a workflow job may use
2837              # multiple pools, in order to match the various computational
2838              # requirements of the various stages of the job.
2839            "diskSourceImage": "A String", # Fully qualified source image for disks.
2840            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2841                # using the standard Dataflow task runner.  Users should ignore
2842                # this field.
2843              "workflowFileName": "A String", # The file to store the workflow in.
2844              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2845                  # will not be uploaded.
2846                  #
2847                  # The supported resource type is:
2848                  #
2849                  # Google Cloud Storage:
2850                  #   storage.googleapis.com/{bucket}/{object}
2851                  #   bucket.storage.googleapis.com/{object}
2852              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2853              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2854                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2855                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2856                    # "shuffle/v1beta1".
2857                "workerId": "A String", # The ID of the worker running this pipeline.
2858                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2859                    #
2860                    # When workers access Google Cloud APIs, they logically do so via
2861                    # relative URLs.  If this field is specified, it supplies the base
2862                    # URL to use for resolving these relative URLs.  The normative
2863                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2864                    # Locators".
2865                    #
2866                    # If not specified, the default value is "http://www.googleapis.com/"
2867                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2868                    # "dataflow/v1b3/projects".
2869                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2870                    # storage.
2871                    #
2872                    # The supported resource type is:
2873                    #
2874                    # Google Cloud Storage:
2875                    #
2876                    #   storage.googleapis.com/{bucket}/{object}
2877                    #   bucket.storage.googleapis.com/{object}
2878              },
2879              "vmId": "A String", # The ID string of the VM.
2880              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2881              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2882              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2883                  # access the Cloud Dataflow API.
2884                "A String",
2885              ],
2886              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2887                  # taskrunner; e.g. "root".
2888              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2889                  #
2890                  # When workers access Google Cloud APIs, they logically do so via
2891                  # relative URLs.  If this field is specified, it supplies the base
2892                  # URL to use for resolving these relative URLs.  The normative
2893                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2894                  # Locators".
2895                  #
2896                  # If not specified, the default value is "http://www.googleapis.com/"
2897              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2898                  # taskrunner; e.g. "wheel".
2899              "languageHint": "A String", # The suggested backend language.
2900              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2901                  # console.
2902              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2903              "logDir": "A String", # The directory on the VM to store logs.
2904              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2905              "harnessCommand": "A String", # The command to launch the worker harness.
2906              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2907                  # temporary storage.
2908                  #
2909                  # The supported resource type is:
2910                  #
2911                  # Google Cloud Storage:
2912                  #   storage.googleapis.com/{bucket}/{object}
2913                  #   bucket.storage.googleapis.com/{object}
2914              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2915            },
2916            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2917                # are supported.
2918            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2919                # service will attempt to choose a reasonable default.
2920            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2921                # the service will use the network "default".
2922            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2923                # will attempt to choose a reasonable default.
2924            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2925                # attempt to choose a reasonable default.
2926            "dataDisks": [ # Data disks that are used by a VM in this workflow.
2927              { # Describes the data disk used by a workflow job.
2928                "mountPoint": "A String", # Directory in a VM where disk is mounted.
2929                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2930                    # attempt to choose a reasonable default.
2931                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2932                    # must be a disk type appropriate to the project and zone in which
2933                    # the workers will run.  If unknown or unspecified, the service
2934                    # will attempt to choose a reasonable default.
2935                    #
2936                    # For example, the standard persistent disk type is a resource name
2937                    # typically ending in "pd-standard".  If SSD persistent disks are
2938                    # available, the resource name typically ends with "pd-ssd".  The
2939                    # actual valid values are defined the Google Compute Engine API,
2940                    # not by the Cloud Dataflow API; consult the Google Compute Engine
2941                    # documentation for more information about determining the set of
2942                    # available disk types for a particular project and zone.
2943                    #
2944                    # Google Compute Engine Disk types are local to a particular
2945                    # project in a particular zone, and so the resource name will
2946                    # typically look something like this:
2947                    #
2948                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2949              },
2950            ],
2951            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2952                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2953                # `TEARDOWN_NEVER`.
2954                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2955                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2956                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2957                # down.
2958                #
2959                # If the workers are not torn down by the service, they will
2960                # continue to run and use Google Compute Engine VM resources in the
2961                # user's project until they are explicitly terminated by the user.
2962                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2963                # policy except for small, manually supervised test jobs.
2964                #
2965                # If unknown or unspecified, the service will attempt to choose a reasonable
2966                # default.
2967            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2968                # Compute Engine API.
2969            "ipConfiguration": "A String", # Configuration for VM IPs.
2970            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2971                # service will choose a number of threads (according to the number of cores
2972                # on the selected machine type for batch, or 1 by convention for streaming).
2973            "poolArgs": { # Extra arguments for this worker pool.
2974              "a_key": "", # Properties of the object. Contains field @type with type URL.
2975            },
2976            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2977                # execute the job.  If zero or unspecified, the service will
2978                # attempt to choose a reasonable default.
2979            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2980                # harness, residing in Google Container Registry.
2981            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2982                # the form "regions/REGION/subnetworks/SUBNETWORK".
2983            "packages": [ # Packages to be installed on workers.
2984              { # The packages that must be installed in order for a worker to run the
2985                  # steps of the Cloud Dataflow job that will be assigned to its worker
2986                  # pool.
2987                  #
2988                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
2989                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2990                  # might use this to install jars containing the user's code and all of the
2991                  # various dependencies (libraries, data files, etc.) required in order
2992                  # for that code to run.
2993                "location": "A String", # The resource to read the package from. The supported resource type is:
2994                    #
2995                    # Google Cloud Storage:
2996                    #
2997                    #   storage.googleapis.com/{bucket}
2998                    #   bucket.storage.googleapis.com/
2999                "name": "A String", # The name of the package.
3000              },
3001            ],
3002            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3003              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3004              "algorithm": "A String", # The algorithm to use for autoscaling.
3005            },
3006            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
3007                # select a default set of packages which are useful to worker
3008                # harnesses written in a particular language.
3009            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
3010                # attempt to choose a reasonable default.
3011            "metadata": { # Metadata to set on the Google Compute Engine VMs.
3012              "a_key": "A String",
3013            },
3014          },
3015        ],
3016      },
3017      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3018          # A description of the user pipeline and stages through which it is executed.
3019          # Created by Cloud Dataflow service.  Only retrieved with
3020          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3021          # form.  This data is provided by the Dataflow service for ease of visualizing
3022          # the pipeline and interpretting Dataflow provided metrics.
3023        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3024          { # Description of the type, names/ids, and input/outputs for a transform.
3025            "kind": "A String", # Type of transform.
3026            "name": "A String", # User provided name for this transform instance.
3027            "inputCollectionName": [ # User names for all collection inputs to this transform.
3028              "A String",
3029            ],
3030            "displayData": [ # Transform-specific display data.
3031              { # Data provided with a pipeline or transform to provide descriptive info.
3032                "shortStrValue": "A String", # A possible additional shorter value to display.
3033                    # For example a java_class_name_value of com.mypackage.MyDoFn
3034                    # will be stored with MyDoFn as the short_str_value and
3035                    # com.mypackage.MyDoFn as the java_class_name value.
3036                    # short_str_value can be displayed and java_class_name_value
3037                    # will be displayed as a tooltip.
3038                "durationValue": "A String", # Contains value if the data is of duration type.
3039                "url": "A String", # An optional full URL.
3040                "floatValue": 3.14, # Contains value if the data is of float type.
3041                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3042                    # language namespace (i.e. python module) which defines the display data.
3043                    # This allows a dax monitoring system to specially handle the data
3044                    # and perform custom rendering.
3045                "javaClassValue": "A String", # Contains value if the data is of java class type.
3046                "label": "A String", # An optional label to display in a dax UI for the element.
3047                "boolValue": True or False, # Contains value if the data is of a boolean type.
3048                "strValue": "A String", # Contains value if the data is of string type.
3049                "key": "A String", # The key identifying the display data.
3050                    # This is intended to be used as a label for the display data
3051                    # when viewed in a dax monitoring system.
3052                "int64Value": "A String", # Contains value if the data is of int64 type.
3053                "timestampValue": "A String", # Contains value if the data is of timestamp type.
3054              },
3055            ],
3056            "outputCollectionName": [ # User  names for all collection outputs to this transform.
3057              "A String",
3058            ],
3059            "id": "A String", # SDK generated id of this transform instance.
3060          },
3061        ],
3062        "displayData": [ # Pipeline level display data.
3063          { # Data provided with a pipeline or transform to provide descriptive info.
3064            "shortStrValue": "A String", # A possible additional shorter value to display.
3065                # For example a java_class_name_value of com.mypackage.MyDoFn
3066                # will be stored with MyDoFn as the short_str_value and
3067                # com.mypackage.MyDoFn as the java_class_name value.
3068                # short_str_value can be displayed and java_class_name_value
3069                # will be displayed as a tooltip.
3070            "durationValue": "A String", # Contains value if the data is of duration type.
3071            "url": "A String", # An optional full URL.
3072            "floatValue": 3.14, # Contains value if the data is of float type.
3073            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3074                # language namespace (i.e. python module) which defines the display data.
3075                # This allows a dax monitoring system to specially handle the data
3076                # and perform custom rendering.
3077            "javaClassValue": "A String", # Contains value if the data is of java class type.
3078            "label": "A String", # An optional label to display in a dax UI for the element.
3079            "boolValue": True or False, # Contains value if the data is of a boolean type.
3080            "strValue": "A String", # Contains value if the data is of string type.
3081            "key": "A String", # The key identifying the display data.
3082                # This is intended to be used as a label for the display data
3083                # when viewed in a dax monitoring system.
3084            "int64Value": "A String", # Contains value if the data is of int64 type.
3085            "timestampValue": "A String", # Contains value if the data is of timestamp type.
3086          },
3087        ],
3088        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3089          { # Description of the composing transforms, names/ids, and input/outputs of a
3090              # stage of execution.  Some composing transforms and sources may have been
3091              # generated by the Dataflow service during execution planning.
3092            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3093              { # Description of an interstitial value between transforms in an execution
3094                  # stage.
3095                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3096                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3097                    # source is most closely associated.
3098                "name": "A String", # Dataflow service generated name for this source.
3099              },
3100            ],
3101            "kind": "A String", # Type of tranform this stage is executing.
3102            "name": "A String", # Dataflow service generated name for this stage.
3103            "outputSource": [ # Output sources for this stage.
3104              { # Description of an input or output of an execution stage.
3105                "userName": "A String", # Human-readable name for this source; may be user or system generated.
3106                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3107                    # source is most closely associated.
3108                "name": "A String", # Dataflow service generated name for this source.
3109                "sizeBytes": "A String", # Size of the source, if measurable.
3110              },
3111            ],
3112            "inputSource": [ # Input sources for this stage.
3113              { # Description of an input or output of an execution stage.
3114                "userName": "A String", # Human-readable name for this source; may be user or system generated.
3115                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3116                    # source is most closely associated.
3117                "name": "A String", # Dataflow service generated name for this source.
3118                "sizeBytes": "A String", # Size of the source, if measurable.
3119              },
3120            ],
3121            "componentTransform": [ # Transforms that comprise this execution stage.
3122              { # Description of a transform executed as part of an execution stage.
3123                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3124                "originalTransform": "A String", # User name for the original user transform with which this transform is
3125                    # most closely associated.
3126                "name": "A String", # Dataflow service generated name for this source.
3127              },
3128            ],
3129            "id": "A String", # Dataflow service generated id for this stage.
3130          },
3131        ],
3132      },
3133      "steps": [ # The top-level steps that constitute the entire job.
3134        { # Defines a particular step within a Cloud Dataflow job.
3135            #
3136            # A job consists of multiple steps, each of which performs some
3137            # specific operation as part of the overall job.  Data is typically
3138            # passed from one step to another as part of the job.
3139            #
3140            # Here's an example of a sequence of steps which together implement a
3141            # Map-Reduce job:
3142            #
3143            #   * Read a collection of data from some source, parsing the
3144            #     collection's elements.
3145            #
3146            #   * Validate the elements.
3147            #
3148            #   * Apply a user-defined function to map each element to some value
3149            #     and extract an element-specific key value.
3150            #
3151            #   * Group elements with the same key into a single element with
3152            #     that key, transforming a multiply-keyed collection into a
3153            #     uniquely-keyed collection.
3154            #
3155            #   * Write the elements out to some data sink.
3156            #
3157            # Note that the Cloud Dataflow service may be used to run many different
3158            # types of jobs, not just Map-Reduce.
3159          "kind": "A String", # The kind of step in the Cloud Dataflow job.
3160          "properties": { # Named properties associated with the step. Each kind of
3161              # predefined step has its own required set of properties.
3162              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3163            "a_key": "", # Properties of the object.
3164          },
3165          "name": "A String", # The name that identifies the step. This must be unique for each
3166              # step with respect to all other steps in the Cloud Dataflow job.
3167        },
3168      ],
3169      "currentStateTime": "A String", # The timestamp associated with the current state.
3170      "tempFiles": [ # A set of files the system should be aware of that are used
3171          # for temporary storage. These temporary files will be
3172          # removed on job completion.
3173          # No duplicates are allowed.
3174          # No file patterns are supported.
3175          #
3176          # The supported files are:
3177          #
3178          # Google Cloud Storage:
3179          #
3180          #    storage.googleapis.com/{bucket}/{object}
3181          #    bucket.storage.googleapis.com/{object}
3182        "A String",
3183      ],
3184      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3185          # callers cannot mutate it.
3186        { # A message describing the state of a particular execution stage.
3187          "executionStageName": "A String", # The name of the execution stage.
3188          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3189          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3190        },
3191      ],
3192      "type": "A String", # The type of Cloud Dataflow job.
3193      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3194          # Cloud Dataflow service.
3195      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3196          # of the job it replaced.
3197          #
3198          # When sending a `CreateJobRequest`, you can update a job by specifying it
3199          # here. The job named here is stopped, and its intermediate state is
3200          # transferred to this job.
3201      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3202          # isn't contained in the submitted job.
3203        "stages": { # A mapping from each stage to the information about that stage.
3204          "a_key": { # Contains information about how a particular
3205              # google.dataflow.v1beta3.Step will be executed.
3206            "stepName": [ # The steps associated with the execution stage.
3207                # Note that stages may have several steps, and that a given step
3208                # might be run by more than one stage.
3209              "A String",
3210            ],
3211          },
3212        },
3213      },
3214    }</pre>
3215</div>
3216
3217</body></html>