1<html><body>
2<style>
3
4body, h1, h2, h3, div, span, p, pre, a {
5  margin: 0;
6  padding: 0;
7  border: 0;
8  font-weight: inherit;
9  font-style: inherit;
10  font-size: 100%;
11  font-family: inherit;
12  vertical-align: baseline;
13}
14
15body {
16  font-size: 13px;
17  padding: 1em;
18}
19
20h1 {
21  font-size: 26px;
22  margin-bottom: 1em;
23}
24
25h2 {
26  font-size: 24px;
27  margin-bottom: 1em;
28}
29
30h3 {
31  font-size: 20px;
32  margin-bottom: 1em;
33  margin-top: 1em;
34}
35
36pre, code {
37  line-height: 1.5;
38  font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
39}
40
41pre {
42  margin-top: 0.5em;
43}
44
45h1, h2, h3, p {
46  font-family: Arial, sans serif;
47}
48
49h1, h2, h3 {
50  border-bottom: solid #CCC 1px;
51}
52
53.toc_element {
54  margin-top: 0.5em;
55}
56
57.firstline {
58  margin-left: 2 em;
59}
60
61.method  {
62  margin-top: 1em;
63  border: solid 1px #CCC;
64  padding: 1em;
65  background: #EEE;
66}
67
68.details {
69  font-weight: bold;
70  font-size: 14px;
71}
72
73</style>
74
75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.jobs.html">jobs</a></h1>
76<h2>Instance Methods</h2>
77<p class="toc_element">
78  <code><a href="dataflow_v1b3.projects.jobs.debug.html">debug()</a></code>
79</p>
80<p class="firstline">Returns the debug Resource.</p>
81
82<p class="toc_element">
83  <code><a href="dataflow_v1b3.projects.jobs.messages.html">messages()</a></code>
84</p>
85<p class="firstline">Returns the messages Resource.</p>
86
87<p class="toc_element">
88  <code><a href="dataflow_v1b3.projects.jobs.workItems.html">workItems()</a></code>
89</p>
90<p class="firstline">Returns the workItems Resource.</p>
91
92<p class="toc_element">
93  <code><a href="#create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</a></code></p>
94<p class="firstline">Creates a Cloud Dataflow job.</p>
95<p class="toc_element">
96  <code><a href="#get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</a></code></p>
97<p class="firstline">Gets the state of the specified Cloud Dataflow job.</p>
98<p class="toc_element">
99  <code><a href="#getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</a></code></p>
100<p class="firstline">Request the job status.</p>
101<p class="toc_element">
102  <code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</a></code></p>
103<p class="firstline">List the jobs of a project.</p>
104<p class="toc_element">
105  <code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
106<p class="firstline">Retrieves the next page of results.</p>
107<p class="toc_element">
108  <code><a href="#update">update(projectId, jobId, body, location=None, x__xgafv=None)</a></code></p>
109<p class="firstline">Updates the state of an existing Cloud Dataflow job.</p>
110<h3>Method Details</h3>
111<div class="method">
112    <code class="details" id="create">create(projectId, body, location=None, x__xgafv=None, replaceJobId=None, view=None)</code>
113  <pre>Creates a Cloud Dataflow job.
114
115Args:
116  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
117  body: object, The request body. (required)
118    The object takes the form of:
119
120{ # Defines a job to be run by the Cloud Dataflow service.
121    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
122        # If this field is set, the service will ensure its uniqueness.
123        # The request to create a job will fail if the service has knowledge of a
124        # previously submitted job with the same client's ID and job name.
125        # The caller may use this field to ensure idempotence of job
126        # creation across retried attempts to create a job.
127        # By default, the field is empty and, in that case, the service ignores it.
128    "requestedState": "A String", # The job's requested state.
129        #
130        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
131        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
132        # also be used to directly set a job's requested state to
133        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
134        # job if it has not already reached a terminal state.
135    "name": "A String", # The user-specified Cloud Dataflow job name.
136        #
137        # Only one Job with a given name may exist in a project at any
138        # given time. If a caller attempts to create a Job with the same
139        # name as an already-existing Job, the attempt returns the
140        # existing Job.
141        #
142        # The name must match the regular expression
143        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
144    "location": "A String", # The location that contains this job.
145    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
146        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
147    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
148    "currentState": "A String", # The current state of the job.
149        #
150        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
151        # specified.
152        #
153        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
154        # terminal state. After a job has reached a terminal state, no
155        # further state updates may be made.
156        #
157        # This field may be mutated by the Cloud Dataflow service;
158        # callers cannot mutate it.
159    "labels": { # User-defined labels for this job.
160        #
161        # The labels map can contain no more than 64 entries.  Entries of the labels
162        # map are UTF8 strings that comply with the following restrictions:
163        #
164        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
165        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
166        # * Both keys and values are additionally constrained to be <= 128 bytes in
167        # size.
168      "a_key": "A String",
169    },
170    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
171        # corresponding name prefixes of the new job.
172      "a_key": "A String",
173    },
174    "id": "A String", # The unique ID of this job.
175        #
176        # This field is set by the Cloud Dataflow service when the Job is
177        # created, and is immutable for the life of the job.
178    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
179      "version": { # A structure describing which components and their versions of the service
180          # are required in order to run the job.
181        "a_key": "", # Properties of the object.
182      },
183      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
184          # storage.  The system will append the suffix "/temp-{JOBNAME} to
185          # this resource prefix, where {JOBNAME} is the value of the
186          # job_name field.  The resulting bucket and object prefix is used
187          # as the prefix of the resources used to store temporary data
188          # needed during the job execution.  NOTE: This will override the
189          # value in taskrunner_settings.
190          # The supported resource type is:
191          #
192          # Google Cloud Storage:
193          #
194          #   storage.googleapis.com/{bucket}/{object}
195          #   bucket.storage.googleapis.com/{object}
196      "internalExperiments": { # Experimental settings.
197        "a_key": "", # Properties of the object. Contains field @type with type URL.
198      },
199      "dataset": "A String", # The dataset for the current project where various workflow
200          # related tables are stored.
201          #
202          # The supported resource type is:
203          #
204          # Google BigQuery:
205          #   bigquery.googleapis.com/{dataset}
206      "experiments": [ # The list of experiments to enable.
207        "A String",
208      ],
209      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
210      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
211          # options are passed through the service and are used to recreate the
212          # SDK pipeline options on the worker in a language agnostic and platform
213          # independent way.
214        "a_key": "", # Properties of the object.
215      },
216      "userAgent": { # A description of the process that generated the request.
217        "a_key": "", # Properties of the object.
218      },
219      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
220          # unspecified, the service will attempt to choose a reasonable
221          # default.  This should be in the form of the API service name,
222          # e.g. "compute.googleapis.com".
223      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
224          # specified in order for the job to have workers.
225        { # Describes one particular pool of Cloud Dataflow workers to be
226            # instantiated by the Cloud Dataflow service in order to perform the
227            # computations required by a job.  Note that a workflow job may use
228            # multiple pools, in order to match the various computational
229            # requirements of the various stages of the job.
230          "diskSourceImage": "A String", # Fully qualified source image for disks.
231          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
232              # using the standard Dataflow task runner.  Users should ignore
233              # this field.
234            "workflowFileName": "A String", # The file to store the workflow in.
235            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
236                # will not be uploaded.
237                #
238                # The supported resource type is:
239                #
240                # Google Cloud Storage:
241                #   storage.googleapis.com/{bucket}/{object}
242                #   bucket.storage.googleapis.com/{object}
243            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
244            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
245              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
246              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
247                  # "shuffle/v1beta1".
248              "workerId": "A String", # The ID of the worker running this pipeline.
249              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
250                  #
251                  # When workers access Google Cloud APIs, they logically do so via
252                  # relative URLs.  If this field is specified, it supplies the base
253                  # URL to use for resolving these relative URLs.  The normative
254                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
255                  # Locators".
256                  #
257                  # If not specified, the default value is "http://www.googleapis.com/"
258              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
259                  # "dataflow/v1b3/projects".
260              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
261                  # storage.
262                  #
263                  # The supported resource type is:
264                  #
265                  # Google Cloud Storage:
266                  #
267                  #   storage.googleapis.com/{bucket}/{object}
268                  #   bucket.storage.googleapis.com/{object}
269            },
270            "vmId": "A String", # The ID string of the VM.
271            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
272            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
273            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
274                # access the Cloud Dataflow API.
275              "A String",
276            ],
277            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
278                # taskrunner; e.g. "root".
279            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
280                #
281                # When workers access Google Cloud APIs, they logically do so via
282                # relative URLs.  If this field is specified, it supplies the base
283                # URL to use for resolving these relative URLs.  The normative
284                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
285                # Locators".
286                #
287                # If not specified, the default value is "http://www.googleapis.com/"
288            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
289                # taskrunner; e.g. "wheel".
290            "languageHint": "A String", # The suggested backend language.
291            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
292                # console.
293            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
294            "logDir": "A String", # The directory on the VM to store logs.
295            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
296            "harnessCommand": "A String", # The command to launch the worker harness.
297            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
298                # temporary storage.
299                #
300                # The supported resource type is:
301                #
302                # Google Cloud Storage:
303                #   storage.googleapis.com/{bucket}/{object}
304                #   bucket.storage.googleapis.com/{object}
305            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
306          },
307          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
308              # are supported.
309          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
310              # service will attempt to choose a reasonable default.
311          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
312              # the service will use the network "default".
313          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
314              # will attempt to choose a reasonable default.
315          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
316              # attempt to choose a reasonable default.
317          "dataDisks": [ # Data disks that are used by a VM in this workflow.
318            { # Describes the data disk used by a workflow job.
319              "mountPoint": "A String", # Directory in a VM where disk is mounted.
320              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
321                  # attempt to choose a reasonable default.
322              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
323                  # must be a disk type appropriate to the project and zone in which
324                  # the workers will run.  If unknown or unspecified, the service
325                  # will attempt to choose a reasonable default.
326                  #
327                  # For example, the standard persistent disk type is a resource name
328                  # typically ending in "pd-standard".  If SSD persistent disks are
329                  # available, the resource name typically ends with "pd-ssd".  The
330                  # actual valid values are defined the Google Compute Engine API,
331                  # not by the Cloud Dataflow API; consult the Google Compute Engine
332                  # documentation for more information about determining the set of
333                  # available disk types for a particular project and zone.
334                  #
335                  # Google Compute Engine Disk types are local to a particular
336                  # project in a particular zone, and so the resource name will
337                  # typically look something like this:
338                  #
339                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
340            },
341          ],
342          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
343              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
344              # `TEARDOWN_NEVER`.
345              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
346              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
347              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
348              # down.
349              #
350              # If the workers are not torn down by the service, they will
351              # continue to run and use Google Compute Engine VM resources in the
352              # user's project until they are explicitly terminated by the user.
353              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
354              # policy except for small, manually supervised test jobs.
355              #
356              # If unknown or unspecified, the service will attempt to choose a reasonable
357              # default.
358          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
359              # Compute Engine API.
360          "ipConfiguration": "A String", # Configuration for VM IPs.
361          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
362              # service will choose a number of threads (according to the number of cores
363              # on the selected machine type for batch, or 1 by convention for streaming).
364          "poolArgs": { # Extra arguments for this worker pool.
365            "a_key": "", # Properties of the object. Contains field @type with type URL.
366          },
367          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
368              # execute the job.  If zero or unspecified, the service will
369              # attempt to choose a reasonable default.
370          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
371              # harness, residing in Google Container Registry.
372          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
373              # the form "regions/REGION/subnetworks/SUBNETWORK".
374          "packages": [ # Packages to be installed on workers.
375            { # The packages that must be installed in order for a worker to run the
376                # steps of the Cloud Dataflow job that will be assigned to its worker
377                # pool.
378                #
379                # This is the mechanism by which the Cloud Dataflow SDK causes code to
380                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
381                # might use this to install jars containing the user's code and all of the
382                # various dependencies (libraries, data files, etc.) required in order
383                # for that code to run.
384              "location": "A String", # The resource to read the package from. The supported resource type is:
385                  #
386                  # Google Cloud Storage:
387                  #
388                  #   storage.googleapis.com/{bucket}
389                  #   bucket.storage.googleapis.com/
390              "name": "A String", # The name of the package.
391            },
392          ],
393          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
394            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
395            "algorithm": "A String", # The algorithm to use for autoscaling.
396          },
397          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
398              # select a default set of packages which are useful to worker
399              # harnesses written in a particular language.
400          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
401              # attempt to choose a reasonable default.
402          "metadata": { # Metadata to set on the Google Compute Engine VMs.
403            "a_key": "A String",
404          },
405        },
406      ],
407    },
408    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
409        # A description of the user pipeline and stages through which it is executed.
410        # Created by Cloud Dataflow service.  Only retrieved with
411        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
412        # form.  This data is provided by the Dataflow service for ease of visualizing
413        # the pipeline and interpretting Dataflow provided metrics.
414      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
415        { # Description of the type, names/ids, and input/outputs for a transform.
416          "kind": "A String", # Type of transform.
417          "name": "A String", # User provided name for this transform instance.
418          "inputCollectionName": [ # User names for all collection inputs to this transform.
419            "A String",
420          ],
421          "displayData": [ # Transform-specific display data.
422            { # Data provided with a pipeline or transform to provide descriptive info.
423              "shortStrValue": "A String", # A possible additional shorter value to display.
424                  # For example a java_class_name_value of com.mypackage.MyDoFn
425                  # will be stored with MyDoFn as the short_str_value and
426                  # com.mypackage.MyDoFn as the java_class_name value.
427                  # short_str_value can be displayed and java_class_name_value
428                  # will be displayed as a tooltip.
429              "durationValue": "A String", # Contains value if the data is of duration type.
430              "url": "A String", # An optional full URL.
431              "floatValue": 3.14, # Contains value if the data is of float type.
432              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
433                  # language namespace (i.e. python module) which defines the display data.
434                  # This allows a dax monitoring system to specially handle the data
435                  # and perform custom rendering.
436              "javaClassValue": "A String", # Contains value if the data is of java class type.
437              "label": "A String", # An optional label to display in a dax UI for the element.
438              "boolValue": True or False, # Contains value if the data is of a boolean type.
439              "strValue": "A String", # Contains value if the data is of string type.
440              "key": "A String", # The key identifying the display data.
441                  # This is intended to be used as a label for the display data
442                  # when viewed in a dax monitoring system.
443              "int64Value": "A String", # Contains value if the data is of int64 type.
444              "timestampValue": "A String", # Contains value if the data is of timestamp type.
445            },
446          ],
447          "outputCollectionName": [ # User  names for all collection outputs to this transform.
448            "A String",
449          ],
450          "id": "A String", # SDK generated id of this transform instance.
451        },
452      ],
453      "displayData": [ # Pipeline level display data.
454        { # Data provided with a pipeline or transform to provide descriptive info.
455          "shortStrValue": "A String", # A possible additional shorter value to display.
456              # For example a java_class_name_value of com.mypackage.MyDoFn
457              # will be stored with MyDoFn as the short_str_value and
458              # com.mypackage.MyDoFn as the java_class_name value.
459              # short_str_value can be displayed and java_class_name_value
460              # will be displayed as a tooltip.
461          "durationValue": "A String", # Contains value if the data is of duration type.
462          "url": "A String", # An optional full URL.
463          "floatValue": 3.14, # Contains value if the data is of float type.
464          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
465              # language namespace (i.e. python module) which defines the display data.
466              # This allows a dax monitoring system to specially handle the data
467              # and perform custom rendering.
468          "javaClassValue": "A String", # Contains value if the data is of java class type.
469          "label": "A String", # An optional label to display in a dax UI for the element.
470          "boolValue": True or False, # Contains value if the data is of a boolean type.
471          "strValue": "A String", # Contains value if the data is of string type.
472          "key": "A String", # The key identifying the display data.
473              # This is intended to be used as a label for the display data
474              # when viewed in a dax monitoring system.
475          "int64Value": "A String", # Contains value if the data is of int64 type.
476          "timestampValue": "A String", # Contains value if the data is of timestamp type.
477        },
478      ],
479      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
480        { # Description of the composing transforms, names/ids, and input/outputs of a
481            # stage of execution.  Some composing transforms and sources may have been
482            # generated by the Dataflow service during execution planning.
483          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
484            { # Description of an interstitial value between transforms in an execution
485                # stage.
486              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
487              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
488                  # source is most closely associated.
489              "name": "A String", # Dataflow service generated name for this source.
490            },
491          ],
492          "kind": "A String", # Type of tranform this stage is executing.
493          "name": "A String", # Dataflow service generated name for this stage.
494          "outputSource": [ # Output sources for this stage.
495            { # Description of an input or output of an execution stage.
496              "userName": "A String", # Human-readable name for this source; may be user or system generated.
497              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
498                  # source is most closely associated.
499              "name": "A String", # Dataflow service generated name for this source.
500              "sizeBytes": "A String", # Size of the source, if measurable.
501            },
502          ],
503          "inputSource": [ # Input sources for this stage.
504            { # Description of an input or output of an execution stage.
505              "userName": "A String", # Human-readable name for this source; may be user or system generated.
506              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
507                  # source is most closely associated.
508              "name": "A String", # Dataflow service generated name for this source.
509              "sizeBytes": "A String", # Size of the source, if measurable.
510            },
511          ],
512          "componentTransform": [ # Transforms that comprise this execution stage.
513            { # Description of a transform executed as part of an execution stage.
514              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
515              "originalTransform": "A String", # User name for the original user transform with which this transform is
516                  # most closely associated.
517              "name": "A String", # Dataflow service generated name for this source.
518            },
519          ],
520          "id": "A String", # Dataflow service generated id for this stage.
521        },
522      ],
523    },
524    "steps": [ # The top-level steps that constitute the entire job.
525      { # Defines a particular step within a Cloud Dataflow job.
526          #
527          # A job consists of multiple steps, each of which performs some
528          # specific operation as part of the overall job.  Data is typically
529          # passed from one step to another as part of the job.
530          #
531          # Here's an example of a sequence of steps which together implement a
532          # Map-Reduce job:
533          #
534          #   * Read a collection of data from some source, parsing the
535          #     collection's elements.
536          #
537          #   * Validate the elements.
538          #
539          #   * Apply a user-defined function to map each element to some value
540          #     and extract an element-specific key value.
541          #
542          #   * Group elements with the same key into a single element with
543          #     that key, transforming a multiply-keyed collection into a
544          #     uniquely-keyed collection.
545          #
546          #   * Write the elements out to some data sink.
547          #
548          # Note that the Cloud Dataflow service may be used to run many different
549          # types of jobs, not just Map-Reduce.
550        "kind": "A String", # The kind of step in the Cloud Dataflow job.
551        "properties": { # Named properties associated with the step. Each kind of
552            # predefined step has its own required set of properties.
553            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
554          "a_key": "", # Properties of the object.
555        },
556        "name": "A String", # The name that identifies the step. This must be unique for each
557            # step with respect to all other steps in the Cloud Dataflow job.
558      },
559    ],
560    "currentStateTime": "A String", # The timestamp associated with the current state.
561    "tempFiles": [ # A set of files the system should be aware of that are used
562        # for temporary storage. These temporary files will be
563        # removed on job completion.
564        # No duplicates are allowed.
565        # No file patterns are supported.
566        #
567        # The supported files are:
568        #
569        # Google Cloud Storage:
570        #
571        #    storage.googleapis.com/{bucket}/{object}
572        #    bucket.storage.googleapis.com/{object}
573      "A String",
574    ],
575    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
576        # callers cannot mutate it.
577      { # A message describing the state of a particular execution stage.
578        "executionStageName": "A String", # The name of the execution stage.
579        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
580        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
581      },
582    ],
583    "type": "A String", # The type of Cloud Dataflow job.
584    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
585        # Cloud Dataflow service.
586    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
587        # of the job it replaced.
588        #
589        # When sending a `CreateJobRequest`, you can update a job by specifying it
590        # here. The job named here is stopped, and its intermediate state is
591        # transferred to this job.
592    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
593        # isn't contained in the submitted job.
594      "stages": { # A mapping from each stage to the information about that stage.
595        "a_key": { # Contains information about how a particular
596            # google.dataflow.v1beta3.Step will be executed.
597          "stepName": [ # The steps associated with the execution stage.
598              # Note that stages may have several steps, and that a given step
599              # might be run by more than one stage.
600            "A String",
601          ],
602        },
603      },
604    },
605  }
606
607  location: string, The location that contains this job.
608  x__xgafv: string, V1 error format.
609    Allowed values
610      1 - v1 error format
611      2 - v2 error format
612  replaceJobId: string, Deprecated. This field is now in the Job message.
613  view: string, The level of information requested in response.
614
615Returns:
616  An object of the form:
617
618    { # Defines a job to be run by the Cloud Dataflow service.
619      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
620          # If this field is set, the service will ensure its uniqueness.
621          # The request to create a job will fail if the service has knowledge of a
622          # previously submitted job with the same client's ID and job name.
623          # The caller may use this field to ensure idempotence of job
624          # creation across retried attempts to create a job.
625          # By default, the field is empty and, in that case, the service ignores it.
626      "requestedState": "A String", # The job's requested state.
627          #
628          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
629          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
630          # also be used to directly set a job's requested state to
631          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
632          # job if it has not already reached a terminal state.
633      "name": "A String", # The user-specified Cloud Dataflow job name.
634          #
635          # Only one Job with a given name may exist in a project at any
636          # given time. If a caller attempts to create a Job with the same
637          # name as an already-existing Job, the attempt returns the
638          # existing Job.
639          #
640          # The name must match the regular expression
641          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
642      "location": "A String", # The location that contains this job.
643      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
644          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
645      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
646      "currentState": "A String", # The current state of the job.
647          #
648          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
649          # specified.
650          #
651          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
652          # terminal state. After a job has reached a terminal state, no
653          # further state updates may be made.
654          #
655          # This field may be mutated by the Cloud Dataflow service;
656          # callers cannot mutate it.
657      "labels": { # User-defined labels for this job.
658          #
659          # The labels map can contain no more than 64 entries.  Entries of the labels
660          # map are UTF8 strings that comply with the following restrictions:
661          #
662          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
663          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
664          # * Both keys and values are additionally constrained to be <= 128 bytes in
665          # size.
666        "a_key": "A String",
667      },
668      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
669          # corresponding name prefixes of the new job.
670        "a_key": "A String",
671      },
672      "id": "A String", # The unique ID of this job.
673          #
674          # This field is set by the Cloud Dataflow service when the Job is
675          # created, and is immutable for the life of the job.
676      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
677        "version": { # A structure describing which components and their versions of the service
678            # are required in order to run the job.
679          "a_key": "", # Properties of the object.
680        },
681        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
682            # storage.  The system will append the suffix "/temp-{JOBNAME} to
683            # this resource prefix, where {JOBNAME} is the value of the
684            # job_name field.  The resulting bucket and object prefix is used
685            # as the prefix of the resources used to store temporary data
686            # needed during the job execution.  NOTE: This will override the
687            # value in taskrunner_settings.
688            # The supported resource type is:
689            #
690            # Google Cloud Storage:
691            #
692            #   storage.googleapis.com/{bucket}/{object}
693            #   bucket.storage.googleapis.com/{object}
694        "internalExperiments": { # Experimental settings.
695          "a_key": "", # Properties of the object. Contains field @type with type URL.
696        },
697        "dataset": "A String", # The dataset for the current project where various workflow
698            # related tables are stored.
699            #
700            # The supported resource type is:
701            #
702            # Google BigQuery:
703            #   bigquery.googleapis.com/{dataset}
704        "experiments": [ # The list of experiments to enable.
705          "A String",
706        ],
707        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
708        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
709            # options are passed through the service and are used to recreate the
710            # SDK pipeline options on the worker in a language agnostic and platform
711            # independent way.
712          "a_key": "", # Properties of the object.
713        },
714        "userAgent": { # A description of the process that generated the request.
715          "a_key": "", # Properties of the object.
716        },
717        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
718            # unspecified, the service will attempt to choose a reasonable
719            # default.  This should be in the form of the API service name,
720            # e.g. "compute.googleapis.com".
721        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
722            # specified in order for the job to have workers.
723          { # Describes one particular pool of Cloud Dataflow workers to be
724              # instantiated by the Cloud Dataflow service in order to perform the
725              # computations required by a job.  Note that a workflow job may use
726              # multiple pools, in order to match the various computational
727              # requirements of the various stages of the job.
728            "diskSourceImage": "A String", # Fully qualified source image for disks.
729            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
730                # using the standard Dataflow task runner.  Users should ignore
731                # this field.
732              "workflowFileName": "A String", # The file to store the workflow in.
733              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
734                  # will not be uploaded.
735                  #
736                  # The supported resource type is:
737                  #
738                  # Google Cloud Storage:
739                  #   storage.googleapis.com/{bucket}/{object}
740                  #   bucket.storage.googleapis.com/{object}
741              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
742              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
743                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
744                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
745                    # "shuffle/v1beta1".
746                "workerId": "A String", # The ID of the worker running this pipeline.
747                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
748                    #
749                    # When workers access Google Cloud APIs, they logically do so via
750                    # relative URLs.  If this field is specified, it supplies the base
751                    # URL to use for resolving these relative URLs.  The normative
752                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
753                    # Locators".
754                    #
755                    # If not specified, the default value is "http://www.googleapis.com/"
756                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
757                    # "dataflow/v1b3/projects".
758                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
759                    # storage.
760                    #
761                    # The supported resource type is:
762                    #
763                    # Google Cloud Storage:
764                    #
765                    #   storage.googleapis.com/{bucket}/{object}
766                    #   bucket.storage.googleapis.com/{object}
767              },
768              "vmId": "A String", # The ID string of the VM.
769              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
770              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
771              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
772                  # access the Cloud Dataflow API.
773                "A String",
774              ],
775              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
776                  # taskrunner; e.g. "root".
777              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
778                  #
779                  # When workers access Google Cloud APIs, they logically do so via
780                  # relative URLs.  If this field is specified, it supplies the base
781                  # URL to use for resolving these relative URLs.  The normative
782                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
783                  # Locators".
784                  #
785                  # If not specified, the default value is "http://www.googleapis.com/"
786              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
787                  # taskrunner; e.g. "wheel".
788              "languageHint": "A String", # The suggested backend language.
789              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
790                  # console.
791              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
792              "logDir": "A String", # The directory on the VM to store logs.
793              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
794              "harnessCommand": "A String", # The command to launch the worker harness.
795              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
796                  # temporary storage.
797                  #
798                  # The supported resource type is:
799                  #
800                  # Google Cloud Storage:
801                  #   storage.googleapis.com/{bucket}/{object}
802                  #   bucket.storage.googleapis.com/{object}
803              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
804            },
805            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
806                # are supported.
807            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
808                # service will attempt to choose a reasonable default.
809            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
810                # the service will use the network "default".
811            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
812                # will attempt to choose a reasonable default.
813            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
814                # attempt to choose a reasonable default.
815            "dataDisks": [ # Data disks that are used by a VM in this workflow.
816              { # Describes the data disk used by a workflow job.
817                "mountPoint": "A String", # Directory in a VM where disk is mounted.
818                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
819                    # attempt to choose a reasonable default.
820                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
821                    # must be a disk type appropriate to the project and zone in which
822                    # the workers will run.  If unknown or unspecified, the service
823                    # will attempt to choose a reasonable default.
824                    #
825                    # For example, the standard persistent disk type is a resource name
826                    # typically ending in "pd-standard".  If SSD persistent disks are
827                    # available, the resource name typically ends with "pd-ssd".  The
828                    # actual valid values are defined the Google Compute Engine API,
829                    # not by the Cloud Dataflow API; consult the Google Compute Engine
830                    # documentation for more information about determining the set of
831                    # available disk types for a particular project and zone.
832                    #
833                    # Google Compute Engine Disk types are local to a particular
834                    # project in a particular zone, and so the resource name will
835                    # typically look something like this:
836                    #
837                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
838              },
839            ],
840            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
841                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
842                # `TEARDOWN_NEVER`.
843                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
844                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
845                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
846                # down.
847                #
848                # If the workers are not torn down by the service, they will
849                # continue to run and use Google Compute Engine VM resources in the
850                # user's project until they are explicitly terminated by the user.
851                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
852                # policy except for small, manually supervised test jobs.
853                #
854                # If unknown or unspecified, the service will attempt to choose a reasonable
855                # default.
856            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
857                # Compute Engine API.
858            "ipConfiguration": "A String", # Configuration for VM IPs.
859            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
860                # service will choose a number of threads (according to the number of cores
861                # on the selected machine type for batch, or 1 by convention for streaming).
862            "poolArgs": { # Extra arguments for this worker pool.
863              "a_key": "", # Properties of the object. Contains field @type with type URL.
864            },
865            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
866                # execute the job.  If zero or unspecified, the service will
867                # attempt to choose a reasonable default.
868            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
869                # harness, residing in Google Container Registry.
870            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
871                # the form "regions/REGION/subnetworks/SUBNETWORK".
872            "packages": [ # Packages to be installed on workers.
873              { # The packages that must be installed in order for a worker to run the
874                  # steps of the Cloud Dataflow job that will be assigned to its worker
875                  # pool.
876                  #
877                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
878                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
879                  # might use this to install jars containing the user's code and all of the
880                  # various dependencies (libraries, data files, etc.) required in order
881                  # for that code to run.
882                "location": "A String", # The resource to read the package from. The supported resource type is:
883                    #
884                    # Google Cloud Storage:
885                    #
886                    #   storage.googleapis.com/{bucket}
887                    #   bucket.storage.googleapis.com/
888                "name": "A String", # The name of the package.
889              },
890            ],
891            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
892              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
893              "algorithm": "A String", # The algorithm to use for autoscaling.
894            },
895            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
896                # select a default set of packages which are useful to worker
897                # harnesses written in a particular language.
898            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
899                # attempt to choose a reasonable default.
900            "metadata": { # Metadata to set on the Google Compute Engine VMs.
901              "a_key": "A String",
902            },
903          },
904        ],
905      },
906      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
907          # A description of the user pipeline and stages through which it is executed.
908          # Created by Cloud Dataflow service.  Only retrieved with
909          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
910          # form.  This data is provided by the Dataflow service for ease of visualizing
911          # the pipeline and interpretting Dataflow provided metrics.
912        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
913          { # Description of the type, names/ids, and input/outputs for a transform.
914            "kind": "A String", # Type of transform.
915            "name": "A String", # User provided name for this transform instance.
916            "inputCollectionName": [ # User names for all collection inputs to this transform.
917              "A String",
918            ],
919            "displayData": [ # Transform-specific display data.
920              { # Data provided with a pipeline or transform to provide descriptive info.
921                "shortStrValue": "A String", # A possible additional shorter value to display.
922                    # For example a java_class_name_value of com.mypackage.MyDoFn
923                    # will be stored with MyDoFn as the short_str_value and
924                    # com.mypackage.MyDoFn as the java_class_name value.
925                    # short_str_value can be displayed and java_class_name_value
926                    # will be displayed as a tooltip.
927                "durationValue": "A String", # Contains value if the data is of duration type.
928                "url": "A String", # An optional full URL.
929                "floatValue": 3.14, # Contains value if the data is of float type.
930                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
931                    # language namespace (i.e. python module) which defines the display data.
932                    # This allows a dax monitoring system to specially handle the data
933                    # and perform custom rendering.
934                "javaClassValue": "A String", # Contains value if the data is of java class type.
935                "label": "A String", # An optional label to display in a dax UI for the element.
936                "boolValue": True or False, # Contains value if the data is of a boolean type.
937                "strValue": "A String", # Contains value if the data is of string type.
938                "key": "A String", # The key identifying the display data.
939                    # This is intended to be used as a label for the display data
940                    # when viewed in a dax monitoring system.
941                "int64Value": "A String", # Contains value if the data is of int64 type.
942                "timestampValue": "A String", # Contains value if the data is of timestamp type.
943              },
944            ],
945            "outputCollectionName": [ # User  names for all collection outputs to this transform.
946              "A String",
947            ],
948            "id": "A String", # SDK generated id of this transform instance.
949          },
950        ],
951        "displayData": [ # Pipeline level display data.
952          { # Data provided with a pipeline or transform to provide descriptive info.
953            "shortStrValue": "A String", # A possible additional shorter value to display.
954                # For example a java_class_name_value of com.mypackage.MyDoFn
955                # will be stored with MyDoFn as the short_str_value and
956                # com.mypackage.MyDoFn as the java_class_name value.
957                # short_str_value can be displayed and java_class_name_value
958                # will be displayed as a tooltip.
959            "durationValue": "A String", # Contains value if the data is of duration type.
960            "url": "A String", # An optional full URL.
961            "floatValue": 3.14, # Contains value if the data is of float type.
962            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
963                # language namespace (i.e. python module) which defines the display data.
964                # This allows a dax monitoring system to specially handle the data
965                # and perform custom rendering.
966            "javaClassValue": "A String", # Contains value if the data is of java class type.
967            "label": "A String", # An optional label to display in a dax UI for the element.
968            "boolValue": True or False, # Contains value if the data is of a boolean type.
969            "strValue": "A String", # Contains value if the data is of string type.
970            "key": "A String", # The key identifying the display data.
971                # This is intended to be used as a label for the display data
972                # when viewed in a dax monitoring system.
973            "int64Value": "A String", # Contains value if the data is of int64 type.
974            "timestampValue": "A String", # Contains value if the data is of timestamp type.
975          },
976        ],
977        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
978          { # Description of the composing transforms, names/ids, and input/outputs of a
979              # stage of execution.  Some composing transforms and sources may have been
980              # generated by the Dataflow service during execution planning.
981            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
982              { # Description of an interstitial value between transforms in an execution
983                  # stage.
984                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
985                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
986                    # source is most closely associated.
987                "name": "A String", # Dataflow service generated name for this source.
988              },
989            ],
990            "kind": "A String", # Type of tranform this stage is executing.
991            "name": "A String", # Dataflow service generated name for this stage.
992            "outputSource": [ # Output sources for this stage.
993              { # Description of an input or output of an execution stage.
994                "userName": "A String", # Human-readable name for this source; may be user or system generated.
995                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
996                    # source is most closely associated.
997                "name": "A String", # Dataflow service generated name for this source.
998                "sizeBytes": "A String", # Size of the source, if measurable.
999              },
1000            ],
1001            "inputSource": [ # Input sources for this stage.
1002              { # Description of an input or output of an execution stage.
1003                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1004                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1005                    # source is most closely associated.
1006                "name": "A String", # Dataflow service generated name for this source.
1007                "sizeBytes": "A String", # Size of the source, if measurable.
1008              },
1009            ],
1010            "componentTransform": [ # Transforms that comprise this execution stage.
1011              { # Description of a transform executed as part of an execution stage.
1012                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1013                "originalTransform": "A String", # User name for the original user transform with which this transform is
1014                    # most closely associated.
1015                "name": "A String", # Dataflow service generated name for this source.
1016              },
1017            ],
1018            "id": "A String", # Dataflow service generated id for this stage.
1019          },
1020        ],
1021      },
1022      "steps": [ # The top-level steps that constitute the entire job.
1023        { # Defines a particular step within a Cloud Dataflow job.
1024            #
1025            # A job consists of multiple steps, each of which performs some
1026            # specific operation as part of the overall job.  Data is typically
1027            # passed from one step to another as part of the job.
1028            #
1029            # Here's an example of a sequence of steps which together implement a
1030            # Map-Reduce job:
1031            #
1032            #   * Read a collection of data from some source, parsing the
1033            #     collection's elements.
1034            #
1035            #   * Validate the elements.
1036            #
1037            #   * Apply a user-defined function to map each element to some value
1038            #     and extract an element-specific key value.
1039            #
1040            #   * Group elements with the same key into a single element with
1041            #     that key, transforming a multiply-keyed collection into a
1042            #     uniquely-keyed collection.
1043            #
1044            #   * Write the elements out to some data sink.
1045            #
1046            # Note that the Cloud Dataflow service may be used to run many different
1047            # types of jobs, not just Map-Reduce.
1048          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1049          "properties": { # Named properties associated with the step. Each kind of
1050              # predefined step has its own required set of properties.
1051              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1052            "a_key": "", # Properties of the object.
1053          },
1054          "name": "A String", # The name that identifies the step. This must be unique for each
1055              # step with respect to all other steps in the Cloud Dataflow job.
1056        },
1057      ],
1058      "currentStateTime": "A String", # The timestamp associated with the current state.
1059      "tempFiles": [ # A set of files the system should be aware of that are used
1060          # for temporary storage. These temporary files will be
1061          # removed on job completion.
1062          # No duplicates are allowed.
1063          # No file patterns are supported.
1064          #
1065          # The supported files are:
1066          #
1067          # Google Cloud Storage:
1068          #
1069          #    storage.googleapis.com/{bucket}/{object}
1070          #    bucket.storage.googleapis.com/{object}
1071        "A String",
1072      ],
1073      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1074          # callers cannot mutate it.
1075        { # A message describing the state of a particular execution stage.
1076          "executionStageName": "A String", # The name of the execution stage.
1077          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1078          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1079        },
1080      ],
1081      "type": "A String", # The type of Cloud Dataflow job.
1082      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1083          # Cloud Dataflow service.
1084      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1085          # of the job it replaced.
1086          #
1087          # When sending a `CreateJobRequest`, you can update a job by specifying it
1088          # here. The job named here is stopped, and its intermediate state is
1089          # transferred to this job.
1090      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1091          # isn't contained in the submitted job.
1092        "stages": { # A mapping from each stage to the information about that stage.
1093          "a_key": { # Contains information about how a particular
1094              # google.dataflow.v1beta3.Step will be executed.
1095            "stepName": [ # The steps associated with the execution stage.
1096                # Note that stages may have several steps, and that a given step
1097                # might be run by more than one stage.
1098              "A String",
1099            ],
1100          },
1101        },
1102      },
1103    }</pre>
1104</div>
1105
1106<div class="method">
1107    <code class="details" id="get">get(projectId, jobId, location=None, x__xgafv=None, view=None)</code>
1108  <pre>Gets the state of the specified Cloud Dataflow job.
1109
1110Args:
1111  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
1112  jobId: string, The job ID. (required)
1113  location: string, The location that contains this job.
1114  x__xgafv: string, V1 error format.
1115    Allowed values
1116      1 - v1 error format
1117      2 - v2 error format
1118  view: string, The level of information requested in response.
1119
1120Returns:
1121  An object of the form:
1122
1123    { # Defines a job to be run by the Cloud Dataflow service.
1124      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1125          # If this field is set, the service will ensure its uniqueness.
1126          # The request to create a job will fail if the service has knowledge of a
1127          # previously submitted job with the same client's ID and job name.
1128          # The caller may use this field to ensure idempotence of job
1129          # creation across retried attempts to create a job.
1130          # By default, the field is empty and, in that case, the service ignores it.
1131      "requestedState": "A String", # The job's requested state.
1132          #
1133          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1134          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1135          # also be used to directly set a job's requested state to
1136          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1137          # job if it has not already reached a terminal state.
1138      "name": "A String", # The user-specified Cloud Dataflow job name.
1139          #
1140          # Only one Job with a given name may exist in a project at any
1141          # given time. If a caller attempts to create a Job with the same
1142          # name as an already-existing Job, the attempt returns the
1143          # existing Job.
1144          #
1145          # The name must match the regular expression
1146          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1147      "location": "A String", # The location that contains this job.
1148      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1149          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1150      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1151      "currentState": "A String", # The current state of the job.
1152          #
1153          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1154          # specified.
1155          #
1156          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1157          # terminal state. After a job has reached a terminal state, no
1158          # further state updates may be made.
1159          #
1160          # This field may be mutated by the Cloud Dataflow service;
1161          # callers cannot mutate it.
1162      "labels": { # User-defined labels for this job.
1163          #
1164          # The labels map can contain no more than 64 entries.  Entries of the labels
1165          # map are UTF8 strings that comply with the following restrictions:
1166          #
1167          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1168          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1169          # * Both keys and values are additionally constrained to be <= 128 bytes in
1170          # size.
1171        "a_key": "A String",
1172      },
1173      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1174          # corresponding name prefixes of the new job.
1175        "a_key": "A String",
1176      },
1177      "id": "A String", # The unique ID of this job.
1178          #
1179          # This field is set by the Cloud Dataflow service when the Job is
1180          # created, and is immutable for the life of the job.
1181      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1182        "version": { # A structure describing which components and their versions of the service
1183            # are required in order to run the job.
1184          "a_key": "", # Properties of the object.
1185        },
1186        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1187            # storage.  The system will append the suffix "/temp-{JOBNAME} to
1188            # this resource prefix, where {JOBNAME} is the value of the
1189            # job_name field.  The resulting bucket and object prefix is used
1190            # as the prefix of the resources used to store temporary data
1191            # needed during the job execution.  NOTE: This will override the
1192            # value in taskrunner_settings.
1193            # The supported resource type is:
1194            #
1195            # Google Cloud Storage:
1196            #
1197            #   storage.googleapis.com/{bucket}/{object}
1198            #   bucket.storage.googleapis.com/{object}
1199        "internalExperiments": { # Experimental settings.
1200          "a_key": "", # Properties of the object. Contains field @type with type URL.
1201        },
1202        "dataset": "A String", # The dataset for the current project where various workflow
1203            # related tables are stored.
1204            #
1205            # The supported resource type is:
1206            #
1207            # Google BigQuery:
1208            #   bigquery.googleapis.com/{dataset}
1209        "experiments": [ # The list of experiments to enable.
1210          "A String",
1211        ],
1212        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1213        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1214            # options are passed through the service and are used to recreate the
1215            # SDK pipeline options on the worker in a language agnostic and platform
1216            # independent way.
1217          "a_key": "", # Properties of the object.
1218        },
1219        "userAgent": { # A description of the process that generated the request.
1220          "a_key": "", # Properties of the object.
1221        },
1222        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1223            # unspecified, the service will attempt to choose a reasonable
1224            # default.  This should be in the form of the API service name,
1225            # e.g. "compute.googleapis.com".
1226        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1227            # specified in order for the job to have workers.
1228          { # Describes one particular pool of Cloud Dataflow workers to be
1229              # instantiated by the Cloud Dataflow service in order to perform the
1230              # computations required by a job.  Note that a workflow job may use
1231              # multiple pools, in order to match the various computational
1232              # requirements of the various stages of the job.
1233            "diskSourceImage": "A String", # Fully qualified source image for disks.
1234            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1235                # using the standard Dataflow task runner.  Users should ignore
1236                # this field.
1237              "workflowFileName": "A String", # The file to store the workflow in.
1238              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1239                  # will not be uploaded.
1240                  #
1241                  # The supported resource type is:
1242                  #
1243                  # Google Cloud Storage:
1244                  #   storage.googleapis.com/{bucket}/{object}
1245                  #   bucket.storage.googleapis.com/{object}
1246              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1247              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1248                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1249                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1250                    # "shuffle/v1beta1".
1251                "workerId": "A String", # The ID of the worker running this pipeline.
1252                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1253                    #
1254                    # When workers access Google Cloud APIs, they logically do so via
1255                    # relative URLs.  If this field is specified, it supplies the base
1256                    # URL to use for resolving these relative URLs.  The normative
1257                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1258                    # Locators".
1259                    #
1260                    # If not specified, the default value is "http://www.googleapis.com/"
1261                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1262                    # "dataflow/v1b3/projects".
1263                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1264                    # storage.
1265                    #
1266                    # The supported resource type is:
1267                    #
1268                    # Google Cloud Storage:
1269                    #
1270                    #   storage.googleapis.com/{bucket}/{object}
1271                    #   bucket.storage.googleapis.com/{object}
1272              },
1273              "vmId": "A String", # The ID string of the VM.
1274              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1275              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1276              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1277                  # access the Cloud Dataflow API.
1278                "A String",
1279              ],
1280              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1281                  # taskrunner; e.g. "root".
1282              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1283                  #
1284                  # When workers access Google Cloud APIs, they logically do so via
1285                  # relative URLs.  If this field is specified, it supplies the base
1286                  # URL to use for resolving these relative URLs.  The normative
1287                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1288                  # Locators".
1289                  #
1290                  # If not specified, the default value is "http://www.googleapis.com/"
1291              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1292                  # taskrunner; e.g. "wheel".
1293              "languageHint": "A String", # The suggested backend language.
1294              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1295                  # console.
1296              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1297              "logDir": "A String", # The directory on the VM to store logs.
1298              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1299              "harnessCommand": "A String", # The command to launch the worker harness.
1300              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1301                  # temporary storage.
1302                  #
1303                  # The supported resource type is:
1304                  #
1305                  # Google Cloud Storage:
1306                  #   storage.googleapis.com/{bucket}/{object}
1307                  #   bucket.storage.googleapis.com/{object}
1308              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1309            },
1310            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1311                # are supported.
1312            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1313                # service will attempt to choose a reasonable default.
1314            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1315                # the service will use the network "default".
1316            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1317                # will attempt to choose a reasonable default.
1318            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1319                # attempt to choose a reasonable default.
1320            "dataDisks": [ # Data disks that are used by a VM in this workflow.
1321              { # Describes the data disk used by a workflow job.
1322                "mountPoint": "A String", # Directory in a VM where disk is mounted.
1323                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1324                    # attempt to choose a reasonable default.
1325                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1326                    # must be a disk type appropriate to the project and zone in which
1327                    # the workers will run.  If unknown or unspecified, the service
1328                    # will attempt to choose a reasonable default.
1329                    #
1330                    # For example, the standard persistent disk type is a resource name
1331                    # typically ending in "pd-standard".  If SSD persistent disks are
1332                    # available, the resource name typically ends with "pd-ssd".  The
1333                    # actual valid values are defined the Google Compute Engine API,
1334                    # not by the Cloud Dataflow API; consult the Google Compute Engine
1335                    # documentation for more information about determining the set of
1336                    # available disk types for a particular project and zone.
1337                    #
1338                    # Google Compute Engine Disk types are local to a particular
1339                    # project in a particular zone, and so the resource name will
1340                    # typically look something like this:
1341                    #
1342                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1343              },
1344            ],
1345            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1346                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1347                # `TEARDOWN_NEVER`.
1348                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1349                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1350                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1351                # down.
1352                #
1353                # If the workers are not torn down by the service, they will
1354                # continue to run and use Google Compute Engine VM resources in the
1355                # user's project until they are explicitly terminated by the user.
1356                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1357                # policy except for small, manually supervised test jobs.
1358                #
1359                # If unknown or unspecified, the service will attempt to choose a reasonable
1360                # default.
1361            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1362                # Compute Engine API.
1363            "ipConfiguration": "A String", # Configuration for VM IPs.
1364            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1365                # service will choose a number of threads (according to the number of cores
1366                # on the selected machine type for batch, or 1 by convention for streaming).
1367            "poolArgs": { # Extra arguments for this worker pool.
1368              "a_key": "", # Properties of the object. Contains field @type with type URL.
1369            },
1370            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1371                # execute the job.  If zero or unspecified, the service will
1372                # attempt to choose a reasonable default.
1373            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1374                # harness, residing in Google Container Registry.
1375            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1376                # the form "regions/REGION/subnetworks/SUBNETWORK".
1377            "packages": [ # Packages to be installed on workers.
1378              { # The packages that must be installed in order for a worker to run the
1379                  # steps of the Cloud Dataflow job that will be assigned to its worker
1380                  # pool.
1381                  #
1382                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
1383                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1384                  # might use this to install jars containing the user's code and all of the
1385                  # various dependencies (libraries, data files, etc.) required in order
1386                  # for that code to run.
1387                "location": "A String", # The resource to read the package from. The supported resource type is:
1388                    #
1389                    # Google Cloud Storage:
1390                    #
1391                    #   storage.googleapis.com/{bucket}
1392                    #   bucket.storage.googleapis.com/
1393                "name": "A String", # The name of the package.
1394              },
1395            ],
1396            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1397              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1398              "algorithm": "A String", # The algorithm to use for autoscaling.
1399            },
1400            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1401                # select a default set of packages which are useful to worker
1402                # harnesses written in a particular language.
1403            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
1404                # attempt to choose a reasonable default.
1405            "metadata": { # Metadata to set on the Google Compute Engine VMs.
1406              "a_key": "A String",
1407            },
1408          },
1409        ],
1410      },
1411      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
1412          # A description of the user pipeline and stages through which it is executed.
1413          # Created by Cloud Dataflow service.  Only retrieved with
1414          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
1415          # form.  This data is provided by the Dataflow service for ease of visualizing
1416          # the pipeline and interpretting Dataflow provided metrics.
1417        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
1418          { # Description of the type, names/ids, and input/outputs for a transform.
1419            "kind": "A String", # Type of transform.
1420            "name": "A String", # User provided name for this transform instance.
1421            "inputCollectionName": [ # User names for all collection inputs to this transform.
1422              "A String",
1423            ],
1424            "displayData": [ # Transform-specific display data.
1425              { # Data provided with a pipeline or transform to provide descriptive info.
1426                "shortStrValue": "A String", # A possible additional shorter value to display.
1427                    # For example a java_class_name_value of com.mypackage.MyDoFn
1428                    # will be stored with MyDoFn as the short_str_value and
1429                    # com.mypackage.MyDoFn as the java_class_name value.
1430                    # short_str_value can be displayed and java_class_name_value
1431                    # will be displayed as a tooltip.
1432                "durationValue": "A String", # Contains value if the data is of duration type.
1433                "url": "A String", # An optional full URL.
1434                "floatValue": 3.14, # Contains value if the data is of float type.
1435                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1436                    # language namespace (i.e. python module) which defines the display data.
1437                    # This allows a dax monitoring system to specially handle the data
1438                    # and perform custom rendering.
1439                "javaClassValue": "A String", # Contains value if the data is of java class type.
1440                "label": "A String", # An optional label to display in a dax UI for the element.
1441                "boolValue": True or False, # Contains value if the data is of a boolean type.
1442                "strValue": "A String", # Contains value if the data is of string type.
1443                "key": "A String", # The key identifying the display data.
1444                    # This is intended to be used as a label for the display data
1445                    # when viewed in a dax monitoring system.
1446                "int64Value": "A String", # Contains value if the data is of int64 type.
1447                "timestampValue": "A String", # Contains value if the data is of timestamp type.
1448              },
1449            ],
1450            "outputCollectionName": [ # User  names for all collection outputs to this transform.
1451              "A String",
1452            ],
1453            "id": "A String", # SDK generated id of this transform instance.
1454          },
1455        ],
1456        "displayData": [ # Pipeline level display data.
1457          { # Data provided with a pipeline or transform to provide descriptive info.
1458            "shortStrValue": "A String", # A possible additional shorter value to display.
1459                # For example a java_class_name_value of com.mypackage.MyDoFn
1460                # will be stored with MyDoFn as the short_str_value and
1461                # com.mypackage.MyDoFn as the java_class_name value.
1462                # short_str_value can be displayed and java_class_name_value
1463                # will be displayed as a tooltip.
1464            "durationValue": "A String", # Contains value if the data is of duration type.
1465            "url": "A String", # An optional full URL.
1466            "floatValue": 3.14, # Contains value if the data is of float type.
1467            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
1468                # language namespace (i.e. python module) which defines the display data.
1469                # This allows a dax monitoring system to specially handle the data
1470                # and perform custom rendering.
1471            "javaClassValue": "A String", # Contains value if the data is of java class type.
1472            "label": "A String", # An optional label to display in a dax UI for the element.
1473            "boolValue": True or False, # Contains value if the data is of a boolean type.
1474            "strValue": "A String", # Contains value if the data is of string type.
1475            "key": "A String", # The key identifying the display data.
1476                # This is intended to be used as a label for the display data
1477                # when viewed in a dax monitoring system.
1478            "int64Value": "A String", # Contains value if the data is of int64 type.
1479            "timestampValue": "A String", # Contains value if the data is of timestamp type.
1480          },
1481        ],
1482        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
1483          { # Description of the composing transforms, names/ids, and input/outputs of a
1484              # stage of execution.  Some composing transforms and sources may have been
1485              # generated by the Dataflow service during execution planning.
1486            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
1487              { # Description of an interstitial value between transforms in an execution
1488                  # stage.
1489                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1490                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1491                    # source is most closely associated.
1492                "name": "A String", # Dataflow service generated name for this source.
1493              },
1494            ],
1495            "kind": "A String", # Type of tranform this stage is executing.
1496            "name": "A String", # Dataflow service generated name for this stage.
1497            "outputSource": [ # Output sources for this stage.
1498              { # Description of an input or output of an execution stage.
1499                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1500                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1501                    # source is most closely associated.
1502                "name": "A String", # Dataflow service generated name for this source.
1503                "sizeBytes": "A String", # Size of the source, if measurable.
1504              },
1505            ],
1506            "inputSource": [ # Input sources for this stage.
1507              { # Description of an input or output of an execution stage.
1508                "userName": "A String", # Human-readable name for this source; may be user or system generated.
1509                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
1510                    # source is most closely associated.
1511                "name": "A String", # Dataflow service generated name for this source.
1512                "sizeBytes": "A String", # Size of the source, if measurable.
1513              },
1514            ],
1515            "componentTransform": [ # Transforms that comprise this execution stage.
1516              { # Description of a transform executed as part of an execution stage.
1517                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
1518                "originalTransform": "A String", # User name for the original user transform with which this transform is
1519                    # most closely associated.
1520                "name": "A String", # Dataflow service generated name for this source.
1521              },
1522            ],
1523            "id": "A String", # Dataflow service generated id for this stage.
1524          },
1525        ],
1526      },
1527      "steps": [ # The top-level steps that constitute the entire job.
1528        { # Defines a particular step within a Cloud Dataflow job.
1529            #
1530            # A job consists of multiple steps, each of which performs some
1531            # specific operation as part of the overall job.  Data is typically
1532            # passed from one step to another as part of the job.
1533            #
1534            # Here's an example of a sequence of steps which together implement a
1535            # Map-Reduce job:
1536            #
1537            #   * Read a collection of data from some source, parsing the
1538            #     collection's elements.
1539            #
1540            #   * Validate the elements.
1541            #
1542            #   * Apply a user-defined function to map each element to some value
1543            #     and extract an element-specific key value.
1544            #
1545            #   * Group elements with the same key into a single element with
1546            #     that key, transforming a multiply-keyed collection into a
1547            #     uniquely-keyed collection.
1548            #
1549            #   * Write the elements out to some data sink.
1550            #
1551            # Note that the Cloud Dataflow service may be used to run many different
1552            # types of jobs, not just Map-Reduce.
1553          "kind": "A String", # The kind of step in the Cloud Dataflow job.
1554          "properties": { # Named properties associated with the step. Each kind of
1555              # predefined step has its own required set of properties.
1556              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
1557            "a_key": "", # Properties of the object.
1558          },
1559          "name": "A String", # The name that identifies the step. This must be unique for each
1560              # step with respect to all other steps in the Cloud Dataflow job.
1561        },
1562      ],
1563      "currentStateTime": "A String", # The timestamp associated with the current state.
1564      "tempFiles": [ # A set of files the system should be aware of that are used
1565          # for temporary storage. These temporary files will be
1566          # removed on job completion.
1567          # No duplicates are allowed.
1568          # No file patterns are supported.
1569          #
1570          # The supported files are:
1571          #
1572          # Google Cloud Storage:
1573          #
1574          #    storage.googleapis.com/{bucket}/{object}
1575          #    bucket.storage.googleapis.com/{object}
1576        "A String",
1577      ],
1578      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
1579          # callers cannot mutate it.
1580        { # A message describing the state of a particular execution stage.
1581          "executionStageName": "A String", # The name of the execution stage.
1582          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
1583          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
1584        },
1585      ],
1586      "type": "A String", # The type of Cloud Dataflow job.
1587      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
1588          # Cloud Dataflow service.
1589      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
1590          # of the job it replaced.
1591          #
1592          # When sending a `CreateJobRequest`, you can update a job by specifying it
1593          # here. The job named here is stopped, and its intermediate state is
1594          # transferred to this job.
1595      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
1596          # isn't contained in the submitted job.
1597        "stages": { # A mapping from each stage to the information about that stage.
1598          "a_key": { # Contains information about how a particular
1599              # google.dataflow.v1beta3.Step will be executed.
1600            "stepName": [ # The steps associated with the execution stage.
1601                # Note that stages may have several steps, and that a given step
1602                # might be run by more than one stage.
1603              "A String",
1604            ],
1605          },
1606        },
1607      },
1608    }</pre>
1609</div>
1610
1611<div class="method">
1612    <code class="details" id="getMetrics">getMetrics(projectId, jobId, startTime=None, location=None, x__xgafv=None)</code>
1613  <pre>Request the job status.
1614
1615Args:
1616  projectId: string, A project id. (required)
1617  jobId: string, The job to get messages for. (required)
1618  startTime: string, Return only metric data that has changed since this time.
1619Default is to return all information about all metrics for the job.
1620  location: string, The location which contains the job specified by job_id.
1621  x__xgafv: string, V1 error format.
1622    Allowed values
1623      1 - v1 error format
1624      2 - v2 error format
1625
1626Returns:
1627  An object of the form:
1628
1629    { # JobMetrics contains a collection of metrics descibing the detailed progress
1630      # of a Dataflow job. Metrics correspond to user-defined and system-defined
1631      # metrics in the job.
1632      #
1633      # This resource captures only the most recent values of each metric;
1634      # time-series data can be queried for them (under the same metric names)
1635      # from Cloud Monitoring.
1636    "metrics": [ # All metrics for this job.
1637      { # Describes the state of a metric.
1638        "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1639            # This holds the count of the aggregated values and is used in combination
1640            # with mean_sum above to obtain the actual mean aggregate value.
1641            # The only possible value type is Long.
1642        "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are
1643            # reporting work progress; it will be filled in responses from the
1644            # metrics API.
1645        "set": "", # Worker-computed aggregate value for the "Set" aggregation kind.  The only
1646            # possible value type is a list of Values whose type can be Long, Double,
1647            # or String, according to the metric's type.  All Values in the list must
1648            # be of the same type.
1649        "name": { # Identifies a metric, by describing the source which generated the # Name of the metric.
1650            # metric.
1651          "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics;
1652              # will be "dataflow" for metrics defined by the Dataflow service or SDK.
1653          "name": "A String", # Worker-defined metric name.
1654          "context": { # Zero or more labeled fields which identify the part of the job this
1655              # metric is associated with, such as the name of a step or collection.
1656              #
1657              # For example, built-in counters associated with steps will have
1658              # context['step'] = <step-name>. Counters associated with PCollections
1659              # in the SDK will have context['pcollection'] = <pcollection-name>.
1660            "a_key": "A String",
1661          },
1662        },
1663        "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate
1664            # value accumulated since the worker started working on this WorkItem.
1665            # By default this is false, indicating that this metric is reported
1666            # as a delta that is not associated with any WorkItem.
1667        "kind": "A String", # Metric aggregation kind.  The possible metric aggregation kinds are
1668            # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution".
1669            # The specified aggregation kind is case-insensitive.
1670            #
1671            # If omitted, this is not an aggregated value but instead
1672            # a single metric sample value.
1673        "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min",
1674            # "And", and "Or".  The possible value types are Long, Double, and Boolean.
1675        "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind.
1676            # This holds the sum of the aggregated values and is used in combination
1677            # with mean_count below to obtain the actual mean aggregate value.
1678            # The only possible value types are Long and Double.
1679        "distribution": "", # A struct value describing properties of a distribution of numeric values.
1680        "internal": "", # Worker-computed aggregate value for internal use by the Dataflow
1681            # service.
1682      },
1683    ],
1684    "metricTime": "A String", # Timestamp as of which metric values are current.
1685  }</pre>
1686</div>
1687
1688<div class="method">
1689    <code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, pageToken=None, location=None, filter=None, view=None)</code>
1690  <pre>List the jobs of a project.
1691
1692Args:
1693  projectId: string, The project which owns the jobs. (required)
1694  pageSize: integer, If there are many jobs, limit response to at most this many.
1695The actual number of jobs returned will be the lesser of max_responses
1696and an unspecified server-defined limit.
1697  x__xgafv: string, V1 error format.
1698    Allowed values
1699      1 - v1 error format
1700      2 - v2 error format
1701  pageToken: string, Set this to the 'next_page_token' field of a previous response
1702to request additional results in a long list.
1703  location: string, The location that contains this job.
1704  filter: string, The kind of filter to use.
1705  view: string, Level of information requested in response. Default is `JOB_VIEW_SUMMARY`.
1706
1707Returns:
1708  An object of the form:
1709
1710    { # Response to a request to list Cloud Dataflow jobs.  This may be a partial
1711      # response, depending on the page size in the ListJobsRequest.
1712    "nextPageToken": "A String", # Set if there may be more results than fit in this response.
1713    "failedLocation": [ # Zero or more messages describing locations that failed to respond.
1714      { # Indicates which location failed to respond to a request for data.
1715        "name": "A String", # The name of the failed location.
1716      },
1717    ],
1718    "jobs": [ # A subset of the requested job information.
1719      { # Defines a job to be run by the Cloud Dataflow service.
1720          "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
1721              # If this field is set, the service will ensure its uniqueness.
1722              # The request to create a job will fail if the service has knowledge of a
1723              # previously submitted job with the same client's ID and job name.
1724              # The caller may use this field to ensure idempotence of job
1725              # creation across retried attempts to create a job.
1726              # By default, the field is empty and, in that case, the service ignores it.
1727          "requestedState": "A String", # The job's requested state.
1728              #
1729              # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
1730              # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
1731              # also be used to directly set a job's requested state to
1732              # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
1733              # job if it has not already reached a terminal state.
1734          "name": "A String", # The user-specified Cloud Dataflow job name.
1735              #
1736              # Only one Job with a given name may exist in a project at any
1737              # given time. If a caller attempts to create a Job with the same
1738              # name as an already-existing Job, the attempt returns the
1739              # existing Job.
1740              #
1741              # The name must match the regular expression
1742              # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
1743          "location": "A String", # The location that contains this job.
1744          "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
1745              # `JOB_STATE_UPDATED`), this field contains the ID of that job.
1746          "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
1747          "currentState": "A String", # The current state of the job.
1748              #
1749              # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
1750              # specified.
1751              #
1752              # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
1753              # terminal state. After a job has reached a terminal state, no
1754              # further state updates may be made.
1755              #
1756              # This field may be mutated by the Cloud Dataflow service;
1757              # callers cannot mutate it.
1758          "labels": { # User-defined labels for this job.
1759              #
1760              # The labels map can contain no more than 64 entries.  Entries of the labels
1761              # map are UTF8 strings that comply with the following restrictions:
1762              #
1763              # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
1764              # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
1765              # * Both keys and values are additionally constrained to be <= 128 bytes in
1766              # size.
1767            "a_key": "A String",
1768          },
1769          "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
1770              # corresponding name prefixes of the new job.
1771            "a_key": "A String",
1772          },
1773          "id": "A String", # The unique ID of this job.
1774              #
1775              # This field is set by the Cloud Dataflow service when the Job is
1776              # created, and is immutable for the life of the job.
1777          "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
1778            "version": { # A structure describing which components and their versions of the service
1779                # are required in order to run the job.
1780              "a_key": "", # Properties of the object.
1781            },
1782            "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1783                # storage.  The system will append the suffix "/temp-{JOBNAME} to
1784                # this resource prefix, where {JOBNAME} is the value of the
1785                # job_name field.  The resulting bucket and object prefix is used
1786                # as the prefix of the resources used to store temporary data
1787                # needed during the job execution.  NOTE: This will override the
1788                # value in taskrunner_settings.
1789                # The supported resource type is:
1790                #
1791                # Google Cloud Storage:
1792                #
1793                #   storage.googleapis.com/{bucket}/{object}
1794                #   bucket.storage.googleapis.com/{object}
1795            "internalExperiments": { # Experimental settings.
1796              "a_key": "", # Properties of the object. Contains field @type with type URL.
1797            },
1798            "dataset": "A String", # The dataset for the current project where various workflow
1799                # related tables are stored.
1800                #
1801                # The supported resource type is:
1802                #
1803                # Google BigQuery:
1804                #   bigquery.googleapis.com/{dataset}
1805            "experiments": [ # The list of experiments to enable.
1806              "A String",
1807            ],
1808            "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
1809            "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
1810                # options are passed through the service and are used to recreate the
1811                # SDK pipeline options on the worker in a language agnostic and platform
1812                # independent way.
1813              "a_key": "", # Properties of the object.
1814            },
1815            "userAgent": { # A description of the process that generated the request.
1816              "a_key": "", # Properties of the object.
1817            },
1818            "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
1819                # unspecified, the service will attempt to choose a reasonable
1820                # default.  This should be in the form of the API service name,
1821                # e.g. "compute.googleapis.com".
1822            "workerPools": [ # The worker pools. At least one "harness" worker pool must be
1823                # specified in order for the job to have workers.
1824              { # Describes one particular pool of Cloud Dataflow workers to be
1825                  # instantiated by the Cloud Dataflow service in order to perform the
1826                  # computations required by a job.  Note that a workflow job may use
1827                  # multiple pools, in order to match the various computational
1828                  # requirements of the various stages of the job.
1829                "diskSourceImage": "A String", # Fully qualified source image for disks.
1830                "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
1831                    # using the standard Dataflow task runner.  Users should ignore
1832                    # this field.
1833                  "workflowFileName": "A String", # The file to store the workflow in.
1834                  "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
1835                      # will not be uploaded.
1836                      #
1837                      # The supported resource type is:
1838                      #
1839                      # Google Cloud Storage:
1840                      #   storage.googleapis.com/{bucket}/{object}
1841                      #   bucket.storage.googleapis.com/{object}
1842                  "commandlinesFileName": "A String", # The file to store preprocessing commands in.
1843                  "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
1844                    "reportingEnabled": True or False, # Whether to send work progress updates to the service.
1845                    "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
1846                        # "shuffle/v1beta1".
1847                    "workerId": "A String", # The ID of the worker running this pipeline.
1848                    "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
1849                        #
1850                        # When workers access Google Cloud APIs, they logically do so via
1851                        # relative URLs.  If this field is specified, it supplies the base
1852                        # URL to use for resolving these relative URLs.  The normative
1853                        # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1854                        # Locators".
1855                        #
1856                        # If not specified, the default value is "http://www.googleapis.com/"
1857                    "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
1858                        # "dataflow/v1b3/projects".
1859                    "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
1860                        # storage.
1861                        #
1862                        # The supported resource type is:
1863                        #
1864                        # Google Cloud Storage:
1865                        #
1866                        #   storage.googleapis.com/{bucket}/{object}
1867                        #   bucket.storage.googleapis.com/{object}
1868                  },
1869                  "vmId": "A String", # The ID string of the VM.
1870                  "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
1871                  "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
1872                  "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
1873                      # access the Cloud Dataflow API.
1874                    "A String",
1875                  ],
1876                  "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
1877                      # taskrunner; e.g. "root".
1878                  "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
1879                      #
1880                      # When workers access Google Cloud APIs, they logically do so via
1881                      # relative URLs.  If this field is specified, it supplies the base
1882                      # URL to use for resolving these relative URLs.  The normative
1883                      # algorithm used is defined by RFC 1808, "Relative Uniform Resource
1884                      # Locators".
1885                      #
1886                      # If not specified, the default value is "http://www.googleapis.com/"
1887                  "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
1888                      # taskrunner; e.g. "wheel".
1889                  "languageHint": "A String", # The suggested backend language.
1890                  "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
1891                      # console.
1892                  "streamingWorkerMainClass": "A String", # The streaming worker main class name.
1893                  "logDir": "A String", # The directory on the VM to store logs.
1894                  "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
1895                  "harnessCommand": "A String", # The command to launch the worker harness.
1896                  "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
1897                      # temporary storage.
1898                      #
1899                      # The supported resource type is:
1900                      #
1901                      # Google Cloud Storage:
1902                      #   storage.googleapis.com/{bucket}/{object}
1903                      #   bucket.storage.googleapis.com/{object}
1904                  "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
1905                },
1906                "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
1907                    # are supported.
1908                "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
1909                    # service will attempt to choose a reasonable default.
1910                "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
1911                    # the service will use the network "default".
1912                "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
1913                    # will attempt to choose a reasonable default.
1914                "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
1915                    # attempt to choose a reasonable default.
1916                "dataDisks": [ # Data disks that are used by a VM in this workflow.
1917                  { # Describes the data disk used by a workflow job.
1918                    "mountPoint": "A String", # Directory in a VM where disk is mounted.
1919                    "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
1920                        # attempt to choose a reasonable default.
1921                    "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
1922                        # must be a disk type appropriate to the project and zone in which
1923                        # the workers will run.  If unknown or unspecified, the service
1924                        # will attempt to choose a reasonable default.
1925                        #
1926                        # For example, the standard persistent disk type is a resource name
1927                        # typically ending in "pd-standard".  If SSD persistent disks are
1928                        # available, the resource name typically ends with "pd-ssd".  The
1929                        # actual valid values are defined the Google Compute Engine API,
1930                        # not by the Cloud Dataflow API; consult the Google Compute Engine
1931                        # documentation for more information about determining the set of
1932                        # available disk types for a particular project and zone.
1933                        #
1934                        # Google Compute Engine Disk types are local to a particular
1935                        # project in a particular zone, and so the resource name will
1936                        # typically look something like this:
1937                        #
1938                        # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
1939                  },
1940                ],
1941                "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
1942                    # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
1943                    # `TEARDOWN_NEVER`.
1944                    # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
1945                    # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
1946                    # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
1947                    # down.
1948                    #
1949                    # If the workers are not torn down by the service, they will
1950                    # continue to run and use Google Compute Engine VM resources in the
1951                    # user's project until they are explicitly terminated by the user.
1952                    # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
1953                    # policy except for small, manually supervised test jobs.
1954                    #
1955                    # If unknown or unspecified, the service will attempt to choose a reasonable
1956                    # default.
1957                "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
1958                    # Compute Engine API.
1959                "ipConfiguration": "A String", # Configuration for VM IPs.
1960                "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
1961                    # service will choose a number of threads (according to the number of cores
1962                    # on the selected machine type for batch, or 1 by convention for streaming).
1963                "poolArgs": { # Extra arguments for this worker pool.
1964                  "a_key": "", # Properties of the object. Contains field @type with type URL.
1965                },
1966                "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
1967                    # execute the job.  If zero or unspecified, the service will
1968                    # attempt to choose a reasonable default.
1969                "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
1970                    # harness, residing in Google Container Registry.
1971                "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
1972                    # the form "regions/REGION/subnetworks/SUBNETWORK".
1973                "packages": [ # Packages to be installed on workers.
1974                  { # The packages that must be installed in order for a worker to run the
1975                      # steps of the Cloud Dataflow job that will be assigned to its worker
1976                      # pool.
1977                      #
1978                      # This is the mechanism by which the Cloud Dataflow SDK causes code to
1979                      # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
1980                      # might use this to install jars containing the user's code and all of the
1981                      # various dependencies (libraries, data files, etc.) required in order
1982                      # for that code to run.
1983                    "location": "A String", # The resource to read the package from. The supported resource type is:
1984                        #
1985                        # Google Cloud Storage:
1986                        #
1987                        #   storage.googleapis.com/{bucket}
1988                        #   bucket.storage.googleapis.com/
1989                    "name": "A String", # The name of the package.
1990                  },
1991                ],
1992                "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
1993                  "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
1994                  "algorithm": "A String", # The algorithm to use for autoscaling.
1995                },
1996                "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
1997                    # select a default set of packages which are useful to worker
1998                    # harnesses written in a particular language.
1999                "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2000                    # attempt to choose a reasonable default.
2001                "metadata": { # Metadata to set on the Google Compute Engine VMs.
2002                  "a_key": "A String",
2003                },
2004              },
2005            ],
2006          },
2007          "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2008              # A description of the user pipeline and stages through which it is executed.
2009              # Created by Cloud Dataflow service.  Only retrieved with
2010              # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2011              # form.  This data is provided by the Dataflow service for ease of visualizing
2012              # the pipeline and interpretting Dataflow provided metrics.
2013            "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2014              { # Description of the type, names/ids, and input/outputs for a transform.
2015                "kind": "A String", # Type of transform.
2016                "name": "A String", # User provided name for this transform instance.
2017                "inputCollectionName": [ # User names for all collection inputs to this transform.
2018                  "A String",
2019                ],
2020                "displayData": [ # Transform-specific display data.
2021                  { # Data provided with a pipeline or transform to provide descriptive info.
2022                    "shortStrValue": "A String", # A possible additional shorter value to display.
2023                        # For example a java_class_name_value of com.mypackage.MyDoFn
2024                        # will be stored with MyDoFn as the short_str_value and
2025                        # com.mypackage.MyDoFn as the java_class_name value.
2026                        # short_str_value can be displayed and java_class_name_value
2027                        # will be displayed as a tooltip.
2028                    "durationValue": "A String", # Contains value if the data is of duration type.
2029                    "url": "A String", # An optional full URL.
2030                    "floatValue": 3.14, # Contains value if the data is of float type.
2031                    "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2032                        # language namespace (i.e. python module) which defines the display data.
2033                        # This allows a dax monitoring system to specially handle the data
2034                        # and perform custom rendering.
2035                    "javaClassValue": "A String", # Contains value if the data is of java class type.
2036                    "label": "A String", # An optional label to display in a dax UI for the element.
2037                    "boolValue": True or False, # Contains value if the data is of a boolean type.
2038                    "strValue": "A String", # Contains value if the data is of string type.
2039                    "key": "A String", # The key identifying the display data.
2040                        # This is intended to be used as a label for the display data
2041                        # when viewed in a dax monitoring system.
2042                    "int64Value": "A String", # Contains value if the data is of int64 type.
2043                    "timestampValue": "A String", # Contains value if the data is of timestamp type.
2044                  },
2045                ],
2046                "outputCollectionName": [ # User  names for all collection outputs to this transform.
2047                  "A String",
2048                ],
2049                "id": "A String", # SDK generated id of this transform instance.
2050              },
2051            ],
2052            "displayData": [ # Pipeline level display data.
2053              { # Data provided with a pipeline or transform to provide descriptive info.
2054                "shortStrValue": "A String", # A possible additional shorter value to display.
2055                    # For example a java_class_name_value of com.mypackage.MyDoFn
2056                    # will be stored with MyDoFn as the short_str_value and
2057                    # com.mypackage.MyDoFn as the java_class_name value.
2058                    # short_str_value can be displayed and java_class_name_value
2059                    # will be displayed as a tooltip.
2060                "durationValue": "A String", # Contains value if the data is of duration type.
2061                "url": "A String", # An optional full URL.
2062                "floatValue": 3.14, # Contains value if the data is of float type.
2063                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2064                    # language namespace (i.e. python module) which defines the display data.
2065                    # This allows a dax monitoring system to specially handle the data
2066                    # and perform custom rendering.
2067                "javaClassValue": "A String", # Contains value if the data is of java class type.
2068                "label": "A String", # An optional label to display in a dax UI for the element.
2069                "boolValue": True or False, # Contains value if the data is of a boolean type.
2070                "strValue": "A String", # Contains value if the data is of string type.
2071                "key": "A String", # The key identifying the display data.
2072                    # This is intended to be used as a label for the display data
2073                    # when viewed in a dax monitoring system.
2074                "int64Value": "A String", # Contains value if the data is of int64 type.
2075                "timestampValue": "A String", # Contains value if the data is of timestamp type.
2076              },
2077            ],
2078            "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2079              { # Description of the composing transforms, names/ids, and input/outputs of a
2080                  # stage of execution.  Some composing transforms and sources may have been
2081                  # generated by the Dataflow service during execution planning.
2082                "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2083                  { # Description of an interstitial value between transforms in an execution
2084                      # stage.
2085                    "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2086                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2087                        # source is most closely associated.
2088                    "name": "A String", # Dataflow service generated name for this source.
2089                  },
2090                ],
2091                "kind": "A String", # Type of tranform this stage is executing.
2092                "name": "A String", # Dataflow service generated name for this stage.
2093                "outputSource": [ # Output sources for this stage.
2094                  { # Description of an input or output of an execution stage.
2095                    "userName": "A String", # Human-readable name for this source; may be user or system generated.
2096                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2097                        # source is most closely associated.
2098                    "name": "A String", # Dataflow service generated name for this source.
2099                    "sizeBytes": "A String", # Size of the source, if measurable.
2100                  },
2101                ],
2102                "inputSource": [ # Input sources for this stage.
2103                  { # Description of an input or output of an execution stage.
2104                    "userName": "A String", # Human-readable name for this source; may be user or system generated.
2105                    "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2106                        # source is most closely associated.
2107                    "name": "A String", # Dataflow service generated name for this source.
2108                    "sizeBytes": "A String", # Size of the source, if measurable.
2109                  },
2110                ],
2111                "componentTransform": [ # Transforms that comprise this execution stage.
2112                  { # Description of a transform executed as part of an execution stage.
2113                    "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2114                    "originalTransform": "A String", # User name for the original user transform with which this transform is
2115                        # most closely associated.
2116                    "name": "A String", # Dataflow service generated name for this source.
2117                  },
2118                ],
2119                "id": "A String", # Dataflow service generated id for this stage.
2120              },
2121            ],
2122          },
2123          "steps": [ # The top-level steps that constitute the entire job.
2124            { # Defines a particular step within a Cloud Dataflow job.
2125                #
2126                # A job consists of multiple steps, each of which performs some
2127                # specific operation as part of the overall job.  Data is typically
2128                # passed from one step to another as part of the job.
2129                #
2130                # Here's an example of a sequence of steps which together implement a
2131                # Map-Reduce job:
2132                #
2133                #   * Read a collection of data from some source, parsing the
2134                #     collection's elements.
2135                #
2136                #   * Validate the elements.
2137                #
2138                #   * Apply a user-defined function to map each element to some value
2139                #     and extract an element-specific key value.
2140                #
2141                #   * Group elements with the same key into a single element with
2142                #     that key, transforming a multiply-keyed collection into a
2143                #     uniquely-keyed collection.
2144                #
2145                #   * Write the elements out to some data sink.
2146                #
2147                # Note that the Cloud Dataflow service may be used to run many different
2148                # types of jobs, not just Map-Reduce.
2149              "kind": "A String", # The kind of step in the Cloud Dataflow job.
2150              "properties": { # Named properties associated with the step. Each kind of
2151                  # predefined step has its own required set of properties.
2152                  # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2153                "a_key": "", # Properties of the object.
2154              },
2155              "name": "A String", # The name that identifies the step. This must be unique for each
2156                  # step with respect to all other steps in the Cloud Dataflow job.
2157            },
2158          ],
2159          "currentStateTime": "A String", # The timestamp associated with the current state.
2160          "tempFiles": [ # A set of files the system should be aware of that are used
2161              # for temporary storage. These temporary files will be
2162              # removed on job completion.
2163              # No duplicates are allowed.
2164              # No file patterns are supported.
2165              #
2166              # The supported files are:
2167              #
2168              # Google Cloud Storage:
2169              #
2170              #    storage.googleapis.com/{bucket}/{object}
2171              #    bucket.storage.googleapis.com/{object}
2172            "A String",
2173          ],
2174          "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2175              # callers cannot mutate it.
2176            { # A message describing the state of a particular execution stage.
2177              "executionStageName": "A String", # The name of the execution stage.
2178              "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2179              "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2180            },
2181          ],
2182          "type": "A String", # The type of Cloud Dataflow job.
2183          "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2184              # Cloud Dataflow service.
2185          "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2186              # of the job it replaced.
2187              #
2188              # When sending a `CreateJobRequest`, you can update a job by specifying it
2189              # here. The job named here is stopped, and its intermediate state is
2190              # transferred to this job.
2191          "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2192              # isn't contained in the submitted job.
2193            "stages": { # A mapping from each stage to the information about that stage.
2194              "a_key": { # Contains information about how a particular
2195                  # google.dataflow.v1beta3.Step will be executed.
2196                "stepName": [ # The steps associated with the execution stage.
2197                    # Note that stages may have several steps, and that a given step
2198                    # might be run by more than one stage.
2199                  "A String",
2200                ],
2201              },
2202            },
2203          },
2204        },
2205    ],
2206  }</pre>
2207</div>
2208
2209<div class="method">
2210    <code class="details" id="list_next">list_next(previous_request, previous_response)</code>
2211  <pre>Retrieves the next page of results.
2212
2213Args:
2214  previous_request: The request for the previous page. (required)
2215  previous_response: The response from the request for the previous page. (required)
2216
2217Returns:
2218  A request object that you can call 'execute()' on to request the next
2219  page. Returns None if there are no more items in the collection.
2220    </pre>
2221</div>
2222
2223<div class="method">
2224    <code class="details" id="update">update(projectId, jobId, body, location=None, x__xgafv=None)</code>
2225  <pre>Updates the state of an existing Cloud Dataflow job.
2226
2227Args:
2228  projectId: string, The ID of the Cloud Platform project that the job belongs to. (required)
2229  jobId: string, The job ID. (required)
2230  body: object, The request body. (required)
2231    The object takes the form of:
2232
2233{ # Defines a job to be run by the Cloud Dataflow service.
2234    "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2235        # If this field is set, the service will ensure its uniqueness.
2236        # The request to create a job will fail if the service has knowledge of a
2237        # previously submitted job with the same client's ID and job name.
2238        # The caller may use this field to ensure idempotence of job
2239        # creation across retried attempts to create a job.
2240        # By default, the field is empty and, in that case, the service ignores it.
2241    "requestedState": "A String", # The job's requested state.
2242        #
2243        # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2244        # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2245        # also be used to directly set a job's requested state to
2246        # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2247        # job if it has not already reached a terminal state.
2248    "name": "A String", # The user-specified Cloud Dataflow job name.
2249        #
2250        # Only one Job with a given name may exist in a project at any
2251        # given time. If a caller attempts to create a Job with the same
2252        # name as an already-existing Job, the attempt returns the
2253        # existing Job.
2254        #
2255        # The name must match the regular expression
2256        # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2257    "location": "A String", # The location that contains this job.
2258    "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2259        # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2260    "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2261    "currentState": "A String", # The current state of the job.
2262        #
2263        # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2264        # specified.
2265        #
2266        # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2267        # terminal state. After a job has reached a terminal state, no
2268        # further state updates may be made.
2269        #
2270        # This field may be mutated by the Cloud Dataflow service;
2271        # callers cannot mutate it.
2272    "labels": { # User-defined labels for this job.
2273        #
2274        # The labels map can contain no more than 64 entries.  Entries of the labels
2275        # map are UTF8 strings that comply with the following restrictions:
2276        #
2277        # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2278        # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2279        # * Both keys and values are additionally constrained to be <= 128 bytes in
2280        # size.
2281      "a_key": "A String",
2282    },
2283    "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2284        # corresponding name prefixes of the new job.
2285      "a_key": "A String",
2286    },
2287    "id": "A String", # The unique ID of this job.
2288        #
2289        # This field is set by the Cloud Dataflow service when the Job is
2290        # created, and is immutable for the life of the job.
2291    "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2292      "version": { # A structure describing which components and their versions of the service
2293          # are required in order to run the job.
2294        "a_key": "", # Properties of the object.
2295      },
2296      "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2297          # storage.  The system will append the suffix "/temp-{JOBNAME} to
2298          # this resource prefix, where {JOBNAME} is the value of the
2299          # job_name field.  The resulting bucket and object prefix is used
2300          # as the prefix of the resources used to store temporary data
2301          # needed during the job execution.  NOTE: This will override the
2302          # value in taskrunner_settings.
2303          # The supported resource type is:
2304          #
2305          # Google Cloud Storage:
2306          #
2307          #   storage.googleapis.com/{bucket}/{object}
2308          #   bucket.storage.googleapis.com/{object}
2309      "internalExperiments": { # Experimental settings.
2310        "a_key": "", # Properties of the object. Contains field @type with type URL.
2311      },
2312      "dataset": "A String", # The dataset for the current project where various workflow
2313          # related tables are stored.
2314          #
2315          # The supported resource type is:
2316          #
2317          # Google BigQuery:
2318          #   bigquery.googleapis.com/{dataset}
2319      "experiments": [ # The list of experiments to enable.
2320        "A String",
2321      ],
2322      "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2323      "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2324          # options are passed through the service and are used to recreate the
2325          # SDK pipeline options on the worker in a language agnostic and platform
2326          # independent way.
2327        "a_key": "", # Properties of the object.
2328      },
2329      "userAgent": { # A description of the process that generated the request.
2330        "a_key": "", # Properties of the object.
2331      },
2332      "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2333          # unspecified, the service will attempt to choose a reasonable
2334          # default.  This should be in the form of the API service name,
2335          # e.g. "compute.googleapis.com".
2336      "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2337          # specified in order for the job to have workers.
2338        { # Describes one particular pool of Cloud Dataflow workers to be
2339            # instantiated by the Cloud Dataflow service in order to perform the
2340            # computations required by a job.  Note that a workflow job may use
2341            # multiple pools, in order to match the various computational
2342            # requirements of the various stages of the job.
2343          "diskSourceImage": "A String", # Fully qualified source image for disks.
2344          "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2345              # using the standard Dataflow task runner.  Users should ignore
2346              # this field.
2347            "workflowFileName": "A String", # The file to store the workflow in.
2348            "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2349                # will not be uploaded.
2350                #
2351                # The supported resource type is:
2352                #
2353                # Google Cloud Storage:
2354                #   storage.googleapis.com/{bucket}/{object}
2355                #   bucket.storage.googleapis.com/{object}
2356            "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2357            "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2358              "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2359              "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2360                  # "shuffle/v1beta1".
2361              "workerId": "A String", # The ID of the worker running this pipeline.
2362              "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2363                  #
2364                  # When workers access Google Cloud APIs, they logically do so via
2365                  # relative URLs.  If this field is specified, it supplies the base
2366                  # URL to use for resolving these relative URLs.  The normative
2367                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2368                  # Locators".
2369                  #
2370                  # If not specified, the default value is "http://www.googleapis.com/"
2371              "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2372                  # "dataflow/v1b3/projects".
2373              "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2374                  # storage.
2375                  #
2376                  # The supported resource type is:
2377                  #
2378                  # Google Cloud Storage:
2379                  #
2380                  #   storage.googleapis.com/{bucket}/{object}
2381                  #   bucket.storage.googleapis.com/{object}
2382            },
2383            "vmId": "A String", # The ID string of the VM.
2384            "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2385            "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2386            "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2387                # access the Cloud Dataflow API.
2388              "A String",
2389            ],
2390            "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2391                # taskrunner; e.g. "root".
2392            "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2393                #
2394                # When workers access Google Cloud APIs, they logically do so via
2395                # relative URLs.  If this field is specified, it supplies the base
2396                # URL to use for resolving these relative URLs.  The normative
2397                # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2398                # Locators".
2399                #
2400                # If not specified, the default value is "http://www.googleapis.com/"
2401            "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2402                # taskrunner; e.g. "wheel".
2403            "languageHint": "A String", # The suggested backend language.
2404            "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2405                # console.
2406            "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2407            "logDir": "A String", # The directory on the VM to store logs.
2408            "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2409            "harnessCommand": "A String", # The command to launch the worker harness.
2410            "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2411                # temporary storage.
2412                #
2413                # The supported resource type is:
2414                #
2415                # Google Cloud Storage:
2416                #   storage.googleapis.com/{bucket}/{object}
2417                #   bucket.storage.googleapis.com/{object}
2418            "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2419          },
2420          "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2421              # are supported.
2422          "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2423              # service will attempt to choose a reasonable default.
2424          "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2425              # the service will use the network "default".
2426          "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2427              # will attempt to choose a reasonable default.
2428          "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2429              # attempt to choose a reasonable default.
2430          "dataDisks": [ # Data disks that are used by a VM in this workflow.
2431            { # Describes the data disk used by a workflow job.
2432              "mountPoint": "A String", # Directory in a VM where disk is mounted.
2433              "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2434                  # attempt to choose a reasonable default.
2435              "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2436                  # must be a disk type appropriate to the project and zone in which
2437                  # the workers will run.  If unknown or unspecified, the service
2438                  # will attempt to choose a reasonable default.
2439                  #
2440                  # For example, the standard persistent disk type is a resource name
2441                  # typically ending in "pd-standard".  If SSD persistent disks are
2442                  # available, the resource name typically ends with "pd-ssd".  The
2443                  # actual valid values are defined the Google Compute Engine API,
2444                  # not by the Cloud Dataflow API; consult the Google Compute Engine
2445                  # documentation for more information about determining the set of
2446                  # available disk types for a particular project and zone.
2447                  #
2448                  # Google Compute Engine Disk types are local to a particular
2449                  # project in a particular zone, and so the resource name will
2450                  # typically look something like this:
2451                  #
2452                  # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2453            },
2454          ],
2455          "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2456              # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2457              # `TEARDOWN_NEVER`.
2458              # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2459              # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2460              # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2461              # down.
2462              #
2463              # If the workers are not torn down by the service, they will
2464              # continue to run and use Google Compute Engine VM resources in the
2465              # user's project until they are explicitly terminated by the user.
2466              # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2467              # policy except for small, manually supervised test jobs.
2468              #
2469              # If unknown or unspecified, the service will attempt to choose a reasonable
2470              # default.
2471          "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2472              # Compute Engine API.
2473          "ipConfiguration": "A String", # Configuration for VM IPs.
2474          "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2475              # service will choose a number of threads (according to the number of cores
2476              # on the selected machine type for batch, or 1 by convention for streaming).
2477          "poolArgs": { # Extra arguments for this worker pool.
2478            "a_key": "", # Properties of the object. Contains field @type with type URL.
2479          },
2480          "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2481              # execute the job.  If zero or unspecified, the service will
2482              # attempt to choose a reasonable default.
2483          "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2484              # harness, residing in Google Container Registry.
2485          "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2486              # the form "regions/REGION/subnetworks/SUBNETWORK".
2487          "packages": [ # Packages to be installed on workers.
2488            { # The packages that must be installed in order for a worker to run the
2489                # steps of the Cloud Dataflow job that will be assigned to its worker
2490                # pool.
2491                #
2492                # This is the mechanism by which the Cloud Dataflow SDK causes code to
2493                # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2494                # might use this to install jars containing the user's code and all of the
2495                # various dependencies (libraries, data files, etc.) required in order
2496                # for that code to run.
2497              "location": "A String", # The resource to read the package from. The supported resource type is:
2498                  #
2499                  # Google Cloud Storage:
2500                  #
2501                  #   storage.googleapis.com/{bucket}
2502                  #   bucket.storage.googleapis.com/
2503              "name": "A String", # The name of the package.
2504            },
2505          ],
2506          "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
2507            "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
2508            "algorithm": "A String", # The algorithm to use for autoscaling.
2509          },
2510          "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
2511              # select a default set of packages which are useful to worker
2512              # harnesses written in a particular language.
2513          "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
2514              # attempt to choose a reasonable default.
2515          "metadata": { # Metadata to set on the Google Compute Engine VMs.
2516            "a_key": "A String",
2517          },
2518        },
2519      ],
2520    },
2521    "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
2522        # A description of the user pipeline and stages through which it is executed.
2523        # Created by Cloud Dataflow service.  Only retrieved with
2524        # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
2525        # form.  This data is provided by the Dataflow service for ease of visualizing
2526        # the pipeline and interpretting Dataflow provided metrics.
2527      "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
2528        { # Description of the type, names/ids, and input/outputs for a transform.
2529          "kind": "A String", # Type of transform.
2530          "name": "A String", # User provided name for this transform instance.
2531          "inputCollectionName": [ # User names for all collection inputs to this transform.
2532            "A String",
2533          ],
2534          "displayData": [ # Transform-specific display data.
2535            { # Data provided with a pipeline or transform to provide descriptive info.
2536              "shortStrValue": "A String", # A possible additional shorter value to display.
2537                  # For example a java_class_name_value of com.mypackage.MyDoFn
2538                  # will be stored with MyDoFn as the short_str_value and
2539                  # com.mypackage.MyDoFn as the java_class_name value.
2540                  # short_str_value can be displayed and java_class_name_value
2541                  # will be displayed as a tooltip.
2542              "durationValue": "A String", # Contains value if the data is of duration type.
2543              "url": "A String", # An optional full URL.
2544              "floatValue": 3.14, # Contains value if the data is of float type.
2545              "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2546                  # language namespace (i.e. python module) which defines the display data.
2547                  # This allows a dax monitoring system to specially handle the data
2548                  # and perform custom rendering.
2549              "javaClassValue": "A String", # Contains value if the data is of java class type.
2550              "label": "A String", # An optional label to display in a dax UI for the element.
2551              "boolValue": True or False, # Contains value if the data is of a boolean type.
2552              "strValue": "A String", # Contains value if the data is of string type.
2553              "key": "A String", # The key identifying the display data.
2554                  # This is intended to be used as a label for the display data
2555                  # when viewed in a dax monitoring system.
2556              "int64Value": "A String", # Contains value if the data is of int64 type.
2557              "timestampValue": "A String", # Contains value if the data is of timestamp type.
2558            },
2559          ],
2560          "outputCollectionName": [ # User  names for all collection outputs to this transform.
2561            "A String",
2562          ],
2563          "id": "A String", # SDK generated id of this transform instance.
2564        },
2565      ],
2566      "displayData": [ # Pipeline level display data.
2567        { # Data provided with a pipeline or transform to provide descriptive info.
2568          "shortStrValue": "A String", # A possible additional shorter value to display.
2569              # For example a java_class_name_value of com.mypackage.MyDoFn
2570              # will be stored with MyDoFn as the short_str_value and
2571              # com.mypackage.MyDoFn as the java_class_name value.
2572              # short_str_value can be displayed and java_class_name_value
2573              # will be displayed as a tooltip.
2574          "durationValue": "A String", # Contains value if the data is of duration type.
2575          "url": "A String", # An optional full URL.
2576          "floatValue": 3.14, # Contains value if the data is of float type.
2577          "namespace": "A String", # The namespace for the key. This is usually a class name or programming
2578              # language namespace (i.e. python module) which defines the display data.
2579              # This allows a dax monitoring system to specially handle the data
2580              # and perform custom rendering.
2581          "javaClassValue": "A String", # Contains value if the data is of java class type.
2582          "label": "A String", # An optional label to display in a dax UI for the element.
2583          "boolValue": True or False, # Contains value if the data is of a boolean type.
2584          "strValue": "A String", # Contains value if the data is of string type.
2585          "key": "A String", # The key identifying the display data.
2586              # This is intended to be used as a label for the display data
2587              # when viewed in a dax monitoring system.
2588          "int64Value": "A String", # Contains value if the data is of int64 type.
2589          "timestampValue": "A String", # Contains value if the data is of timestamp type.
2590        },
2591      ],
2592      "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
2593        { # Description of the composing transforms, names/ids, and input/outputs of a
2594            # stage of execution.  Some composing transforms and sources may have been
2595            # generated by the Dataflow service during execution planning.
2596          "componentSource": [ # Collections produced and consumed by component transforms of this stage.
2597            { # Description of an interstitial value between transforms in an execution
2598                # stage.
2599              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2600              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2601                  # source is most closely associated.
2602              "name": "A String", # Dataflow service generated name for this source.
2603            },
2604          ],
2605          "kind": "A String", # Type of tranform this stage is executing.
2606          "name": "A String", # Dataflow service generated name for this stage.
2607          "outputSource": [ # Output sources for this stage.
2608            { # Description of an input or output of an execution stage.
2609              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2610              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2611                  # source is most closely associated.
2612              "name": "A String", # Dataflow service generated name for this source.
2613              "sizeBytes": "A String", # Size of the source, if measurable.
2614            },
2615          ],
2616          "inputSource": [ # Input sources for this stage.
2617            { # Description of an input or output of an execution stage.
2618              "userName": "A String", # Human-readable name for this source; may be user or system generated.
2619              "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
2620                  # source is most closely associated.
2621              "name": "A String", # Dataflow service generated name for this source.
2622              "sizeBytes": "A String", # Size of the source, if measurable.
2623            },
2624          ],
2625          "componentTransform": [ # Transforms that comprise this execution stage.
2626            { # Description of a transform executed as part of an execution stage.
2627              "userName": "A String", # Human-readable name for this transform; may be user or system generated.
2628              "originalTransform": "A String", # User name for the original user transform with which this transform is
2629                  # most closely associated.
2630              "name": "A String", # Dataflow service generated name for this source.
2631            },
2632          ],
2633          "id": "A String", # Dataflow service generated id for this stage.
2634        },
2635      ],
2636    },
2637    "steps": [ # The top-level steps that constitute the entire job.
2638      { # Defines a particular step within a Cloud Dataflow job.
2639          #
2640          # A job consists of multiple steps, each of which performs some
2641          # specific operation as part of the overall job.  Data is typically
2642          # passed from one step to another as part of the job.
2643          #
2644          # Here's an example of a sequence of steps which together implement a
2645          # Map-Reduce job:
2646          #
2647          #   * Read a collection of data from some source, parsing the
2648          #     collection's elements.
2649          #
2650          #   * Validate the elements.
2651          #
2652          #   * Apply a user-defined function to map each element to some value
2653          #     and extract an element-specific key value.
2654          #
2655          #   * Group elements with the same key into a single element with
2656          #     that key, transforming a multiply-keyed collection into a
2657          #     uniquely-keyed collection.
2658          #
2659          #   * Write the elements out to some data sink.
2660          #
2661          # Note that the Cloud Dataflow service may be used to run many different
2662          # types of jobs, not just Map-Reduce.
2663        "kind": "A String", # The kind of step in the Cloud Dataflow job.
2664        "properties": { # Named properties associated with the step. Each kind of
2665            # predefined step has its own required set of properties.
2666            # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
2667          "a_key": "", # Properties of the object.
2668        },
2669        "name": "A String", # The name that identifies the step. This must be unique for each
2670            # step with respect to all other steps in the Cloud Dataflow job.
2671      },
2672    ],
2673    "currentStateTime": "A String", # The timestamp associated with the current state.
2674    "tempFiles": [ # A set of files the system should be aware of that are used
2675        # for temporary storage. These temporary files will be
2676        # removed on job completion.
2677        # No duplicates are allowed.
2678        # No file patterns are supported.
2679        #
2680        # The supported files are:
2681        #
2682        # Google Cloud Storage:
2683        #
2684        #    storage.googleapis.com/{bucket}/{object}
2685        #    bucket.storage.googleapis.com/{object}
2686      "A String",
2687    ],
2688    "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
2689        # callers cannot mutate it.
2690      { # A message describing the state of a particular execution stage.
2691        "executionStageName": "A String", # The name of the execution stage.
2692        "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
2693        "currentStateTime": "A String", # The time at which the stage transitioned to this state.
2694      },
2695    ],
2696    "type": "A String", # The type of Cloud Dataflow job.
2697    "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
2698        # Cloud Dataflow service.
2699    "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
2700        # of the job it replaced.
2701        #
2702        # When sending a `CreateJobRequest`, you can update a job by specifying it
2703        # here. The job named here is stopped, and its intermediate state is
2704        # transferred to this job.
2705    "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
2706        # isn't contained in the submitted job.
2707      "stages": { # A mapping from each stage to the information about that stage.
2708        "a_key": { # Contains information about how a particular
2709            # google.dataflow.v1beta3.Step will be executed.
2710          "stepName": [ # The steps associated with the execution stage.
2711              # Note that stages may have several steps, and that a given step
2712              # might be run by more than one stage.
2713            "A String",
2714          ],
2715        },
2716      },
2717    },
2718  }
2719
2720  location: string, The location that contains this job.
2721  x__xgafv: string, V1 error format.
2722    Allowed values
2723      1 - v1 error format
2724      2 - v2 error format
2725
2726Returns:
2727  An object of the form:
2728
2729    { # Defines a job to be run by the Cloud Dataflow service.
2730      "clientRequestId": "A String", # The client's unique identifier of the job, re-used across retried attempts.
2731          # If this field is set, the service will ensure its uniqueness.
2732          # The request to create a job will fail if the service has knowledge of a
2733          # previously submitted job with the same client's ID and job name.
2734          # The caller may use this field to ensure idempotence of job
2735          # creation across retried attempts to create a job.
2736          # By default, the field is empty and, in that case, the service ignores it.
2737      "requestedState": "A String", # The job's requested state.
2738          #
2739          # `UpdateJob` may be used to switch between the `JOB_STATE_STOPPED` and
2740          # `JOB_STATE_RUNNING` states, by setting requested_state.  `UpdateJob` may
2741          # also be used to directly set a job's requested state to
2742          # `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably terminating the
2743          # job if it has not already reached a terminal state.
2744      "name": "A String", # The user-specified Cloud Dataflow job name.
2745          #
2746          # Only one Job with a given name may exist in a project at any
2747          # given time. If a caller attempts to create a Job with the same
2748          # name as an already-existing Job, the attempt returns the
2749          # existing Job.
2750          #
2751          # The name must match the regular expression
2752          # `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
2753      "location": "A String", # The location that contains this job.
2754      "replacedByJobId": "A String", # If another job is an update of this job (and thus, this job is in
2755          # `JOB_STATE_UPDATED`), this field contains the ID of that job.
2756      "projectId": "A String", # The ID of the Cloud Platform project that the job belongs to.
2757      "currentState": "A String", # The current state of the job.
2758          #
2759          # Jobs are created in the `JOB_STATE_STOPPED` state unless otherwise
2760          # specified.
2761          #
2762          # A job in the `JOB_STATE_RUNNING` state may asynchronously enter a
2763          # terminal state. After a job has reached a terminal state, no
2764          # further state updates may be made.
2765          #
2766          # This field may be mutated by the Cloud Dataflow service;
2767          # callers cannot mutate it.
2768      "labels": { # User-defined labels for this job.
2769          #
2770          # The labels map can contain no more than 64 entries.  Entries of the labels
2771          # map are UTF8 strings that comply with the following restrictions:
2772          #
2773          # * Keys must conform to regexp:  \p{Ll}\p{Lo}{0,62}
2774          # * Values must conform to regexp:  [\p{Ll}\p{Lo}\p{N}_-]{0,63}
2775          # * Both keys and values are additionally constrained to be <= 128 bytes in
2776          # size.
2777        "a_key": "A String",
2778      },
2779      "transformNameMapping": { # The map of transform name prefixes of the job to be replaced to the
2780          # corresponding name prefixes of the new job.
2781        "a_key": "A String",
2782      },
2783      "id": "A String", # The unique ID of this job.
2784          #
2785          # This field is set by the Cloud Dataflow service when the Job is
2786          # created, and is immutable for the life of the job.
2787      "environment": { # Describes the environment in which a Dataflow Job runs. # The environment for the job.
2788        "version": { # A structure describing which components and their versions of the service
2789            # are required in order to run the job.
2790          "a_key": "", # Properties of the object.
2791        },
2792        "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2793            # storage.  The system will append the suffix "/temp-{JOBNAME} to
2794            # this resource prefix, where {JOBNAME} is the value of the
2795            # job_name field.  The resulting bucket and object prefix is used
2796            # as the prefix of the resources used to store temporary data
2797            # needed during the job execution.  NOTE: This will override the
2798            # value in taskrunner_settings.
2799            # The supported resource type is:
2800            #
2801            # Google Cloud Storage:
2802            #
2803            #   storage.googleapis.com/{bucket}/{object}
2804            #   bucket.storage.googleapis.com/{object}
2805        "internalExperiments": { # Experimental settings.
2806          "a_key": "", # Properties of the object. Contains field @type with type URL.
2807        },
2808        "dataset": "A String", # The dataset for the current project where various workflow
2809            # related tables are stored.
2810            #
2811            # The supported resource type is:
2812            #
2813            # Google BigQuery:
2814            #   bigquery.googleapis.com/{dataset}
2815        "experiments": [ # The list of experiments to enable.
2816          "A String",
2817        ],
2818        "serviceAccountEmail": "A String", # Identity to run virtual machines as. Defaults to the default account.
2819        "sdkPipelineOptions": { # The Cloud Dataflow SDK pipeline options specified by the user. These
2820            # options are passed through the service and are used to recreate the
2821            # SDK pipeline options on the worker in a language agnostic and platform
2822            # independent way.
2823          "a_key": "", # Properties of the object.
2824        },
2825        "userAgent": { # A description of the process that generated the request.
2826          "a_key": "", # Properties of the object.
2827        },
2828        "clusterManagerApiService": "A String", # The type of cluster manager API to use.  If unknown or
2829            # unspecified, the service will attempt to choose a reasonable
2830            # default.  This should be in the form of the API service name,
2831            # e.g. "compute.googleapis.com".
2832        "workerPools": [ # The worker pools. At least one "harness" worker pool must be
2833            # specified in order for the job to have workers.
2834          { # Describes one particular pool of Cloud Dataflow workers to be
2835              # instantiated by the Cloud Dataflow service in order to perform the
2836              # computations required by a job.  Note that a workflow job may use
2837              # multiple pools, in order to match the various computational
2838              # requirements of the various stages of the job.
2839            "diskSourceImage": "A String", # Fully qualified source image for disks.
2840            "taskrunnerSettings": { # Taskrunner configuration settings. # Settings passed through to Google Compute Engine workers when
2841                # using the standard Dataflow task runner.  Users should ignore
2842                # this field.
2843              "workflowFileName": "A String", # The file to store the workflow in.
2844              "logUploadLocation": "A String", # Indicates where to put logs.  If this is not specified, the logs
2845                  # will not be uploaded.
2846                  #
2847                  # The supported resource type is:
2848                  #
2849                  # Google Cloud Storage:
2850                  #   storage.googleapis.com/{bucket}/{object}
2851                  #   bucket.storage.googleapis.com/{object}
2852              "commandlinesFileName": "A String", # The file to store preprocessing commands in.
2853              "parallelWorkerSettings": { # Provides data to pass through to the worker harness. # The settings to pass to the parallel worker harness.
2854                "reportingEnabled": True or False, # Whether to send work progress updates to the service.
2855                "shuffleServicePath": "A String", # The Shuffle service path relative to the root URL, for example,
2856                    # "shuffle/v1beta1".
2857                "workerId": "A String", # The ID of the worker running this pipeline.
2858                "baseUrl": "A String", # The base URL for accessing Google Cloud APIs.
2859                    #
2860                    # When workers access Google Cloud APIs, they logically do so via
2861                    # relative URLs.  If this field is specified, it supplies the base
2862                    # URL to use for resolving these relative URLs.  The normative
2863                    # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2864                    # Locators".
2865                    #
2866                    # If not specified, the default value is "http://www.googleapis.com/"
2867                "servicePath": "A String", # The Cloud Dataflow service path relative to the root URL, for example,
2868                    # "dataflow/v1b3/projects".
2869                "tempStoragePrefix": "A String", # The prefix of the resources the system should use for temporary
2870                    # storage.
2871                    #
2872                    # The supported resource type is:
2873                    #
2874                    # Google Cloud Storage:
2875                    #
2876                    #   storage.googleapis.com/{bucket}/{object}
2877                    #   bucket.storage.googleapis.com/{object}
2878              },
2879              "vmId": "A String", # The ID string of the VM.
2880              "baseTaskDir": "A String", # The location on the worker for task-specific subdirectories.
2881              "continueOnException": True or False, # Whether to continue taskrunner if an exception is hit.
2882              "oauthScopes": [ # The OAuth2 scopes to be requested by the taskrunner in order to
2883                  # access the Cloud Dataflow API.
2884                "A String",
2885              ],
2886              "taskUser": "A String", # The UNIX user ID on the worker VM to use for tasks launched by
2887                  # taskrunner; e.g. "root".
2888              "baseUrl": "A String", # The base URL for the taskrunner to use when accessing Google Cloud APIs.
2889                  #
2890                  # When workers access Google Cloud APIs, they logically do so via
2891                  # relative URLs.  If this field is specified, it supplies the base
2892                  # URL to use for resolving these relative URLs.  The normative
2893                  # algorithm used is defined by RFC 1808, "Relative Uniform Resource
2894                  # Locators".
2895                  #
2896                  # If not specified, the default value is "http://www.googleapis.com/"
2897              "taskGroup": "A String", # The UNIX group ID on the worker VM to use for tasks launched by
2898                  # taskrunner; e.g. "wheel".
2899              "languageHint": "A String", # The suggested backend language.
2900              "logToSerialconsole": True or False, # Whether to send taskrunner log info to Google Compute Engine VM serial
2901                  # console.
2902              "streamingWorkerMainClass": "A String", # The streaming worker main class name.
2903              "logDir": "A String", # The directory on the VM to store logs.
2904              "dataflowApiVersion": "A String", # The API version of endpoint, e.g. "v1b3"
2905              "harnessCommand": "A String", # The command to launch the worker harness.
2906              "tempStoragePrefix": "A String", # The prefix of the resources the taskrunner should use for
2907                  # temporary storage.
2908                  #
2909                  # The supported resource type is:
2910                  #
2911                  # Google Cloud Storage:
2912                  #   storage.googleapis.com/{bucket}/{object}
2913                  #   bucket.storage.googleapis.com/{object}
2914              "alsologtostderr": True or False, # Whether to also send taskrunner log info to stderr.
2915            },
2916            "kind": "A String", # The kind of the worker pool; currently only `harness` and `shuffle`
2917                # are supported.
2918            "machineType": "A String", # Machine type (e.g. "n1-standard-1").  If empty or unspecified, the
2919                # service will attempt to choose a reasonable default.
2920            "network": "A String", # Network to which VMs will be assigned.  If empty or unspecified,
2921                # the service will use the network "default".
2922            "zone": "A String", # Zone to run the worker pools in.  If empty or unspecified, the service
2923                # will attempt to choose a reasonable default.
2924            "diskSizeGb": 42, # Size of root disk for VMs, in GB.  If zero or unspecified, the service will
2925                # attempt to choose a reasonable default.
2926            "dataDisks": [ # Data disks that are used by a VM in this workflow.
2927              { # Describes the data disk used by a workflow job.
2928                "mountPoint": "A String", # Directory in a VM where disk is mounted.
2929                "sizeGb": 42, # Size of disk in GB.  If zero or unspecified, the service will
2930                    # attempt to choose a reasonable default.
2931                "diskType": "A String", # Disk storage type, as defined by Google Compute Engine.  This
2932                    # must be a disk type appropriate to the project and zone in which
2933                    # the workers will run.  If unknown or unspecified, the service
2934                    # will attempt to choose a reasonable default.
2935                    #
2936                    # For example, the standard persistent disk type is a resource name
2937                    # typically ending in "pd-standard".  If SSD persistent disks are
2938                    # available, the resource name typically ends with "pd-ssd".  The
2939                    # actual valid values are defined the Google Compute Engine API,
2940                    # not by the Cloud Dataflow API; consult the Google Compute Engine
2941                    # documentation for more information about determining the set of
2942                    # available disk types for a particular project and zone.
2943                    #
2944                    # Google Compute Engine Disk types are local to a particular
2945                    # project in a particular zone, and so the resource name will
2946                    # typically look something like this:
2947                    #
2948                    # compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
2949              },
2950            ],
2951            "teardownPolicy": "A String", # Sets the policy for determining when to turndown worker pool.
2952                # Allowed values are: `TEARDOWN_ALWAYS`, `TEARDOWN_ON_SUCCESS`, and
2953                # `TEARDOWN_NEVER`.
2954                # `TEARDOWN_ALWAYS` means workers are always torn down regardless of whether
2955                # the job succeeds. `TEARDOWN_ON_SUCCESS` means workers are torn down
2956                # if the job succeeds. `TEARDOWN_NEVER` means the workers are never torn
2957                # down.
2958                #
2959                # If the workers are not torn down by the service, they will
2960                # continue to run and use Google Compute Engine VM resources in the
2961                # user's project until they are explicitly terminated by the user.
2962                # Because of this, Google recommends using the `TEARDOWN_ALWAYS`
2963                # policy except for small, manually supervised test jobs.
2964                #
2965                # If unknown or unspecified, the service will attempt to choose a reasonable
2966                # default.
2967            "onHostMaintenance": "A String", # The action to take on host maintenance, as defined by the Google
2968                # Compute Engine API.
2969            "ipConfiguration": "A String", # Configuration for VM IPs.
2970            "numThreadsPerWorker": 42, # The number of threads per worker harness. If empty or unspecified, the
2971                # service will choose a number of threads (according to the number of cores
2972                # on the selected machine type for batch, or 1 by convention for streaming).
2973            "poolArgs": { # Extra arguments for this worker pool.
2974              "a_key": "", # Properties of the object. Contains field @type with type URL.
2975            },
2976            "numWorkers": 42, # Number of Google Compute Engine workers in this pool needed to
2977                # execute the job.  If zero or unspecified, the service will
2978                # attempt to choose a reasonable default.
2979            "workerHarnessContainerImage": "A String", # Required. Docker container image that executes the Cloud Dataflow worker
2980                # harness, residing in Google Container Registry.
2981            "subnetwork": "A String", # Subnetwork to which VMs will be assigned, if desired.  Expected to be of
2982                # the form "regions/REGION/subnetworks/SUBNETWORK".
2983            "packages": [ # Packages to be installed on workers.
2984              { # The packages that must be installed in order for a worker to run the
2985                  # steps of the Cloud Dataflow job that will be assigned to its worker
2986                  # pool.
2987                  #
2988                  # This is the mechanism by which the Cloud Dataflow SDK causes code to
2989                  # be loaded onto the workers. For example, the Cloud Dataflow Java SDK
2990                  # might use this to install jars containing the user's code and all of the
2991                  # various dependencies (libraries, data files, etc.) required in order
2992                  # for that code to run.
2993                "location": "A String", # The resource to read the package from. The supported resource type is:
2994                    #
2995                    # Google Cloud Storage:
2996                    #
2997                    #   storage.googleapis.com/{bucket}
2998                    #   bucket.storage.googleapis.com/
2999                "name": "A String", # The name of the package.
3000              },
3001            ],
3002            "autoscalingSettings": { # Settings for WorkerPool autoscaling. # Settings for autoscaling of this WorkerPool.
3003              "maxNumWorkers": 42, # The maximum number of workers to cap scaling at.
3004              "algorithm": "A String", # The algorithm to use for autoscaling.
3005            },
3006            "defaultPackageSet": "A String", # The default package set to install.  This allows the service to
3007                # select a default set of packages which are useful to worker
3008                # harnesses written in a particular language.
3009            "diskType": "A String", # Type of root disk for VMs.  If empty or unspecified, the service will
3010                # attempt to choose a reasonable default.
3011            "metadata": { # Metadata to set on the Google Compute Engine VMs.
3012              "a_key": "A String",
3013            },
3014          },
3015        ],
3016      },
3017      "pipelineDescription": { # A descriptive representation of submitted pipeline as well as the executed # Preliminary field: The format of this data may change at any time.
3018          # A description of the user pipeline and stages through which it is executed.
3019          # Created by Cloud Dataflow service.  Only retrieved with
3020          # JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
3021          # form.  This data is provided by the Dataflow service for ease of visualizing
3022          # the pipeline and interpretting Dataflow provided metrics.
3023        "originalPipelineTransform": [ # Description of each transform in the pipeline and collections between them.
3024          { # Description of the type, names/ids, and input/outputs for a transform.
3025            "kind": "A String", # Type of transform.
3026            "name": "A String", # User provided name for this transform instance.
3027            "inputCollectionName": [ # User names for all collection inputs to this transform.
3028              "A String",
3029            ],
3030            "displayData": [ # Transform-specific display data.
3031              { # Data provided with a pipeline or transform to provide descriptive info.
3032                "shortStrValue": "A String", # A possible additional shorter value to display.
3033                    # For example a java_class_name_value of com.mypackage.MyDoFn
3034                    # will be stored with MyDoFn as the short_str_value and
3035                    # com.mypackage.MyDoFn as the java_class_name value.
3036                    # short_str_value can be displayed and java_class_name_value
3037                    # will be displayed as a tooltip.
3038                "durationValue": "A String", # Contains value if the data is of duration type.
3039                "url": "A String", # An optional full URL.
3040                "floatValue": 3.14, # Contains value if the data is of float type.
3041                "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3042                    # language namespace (i.e. python module) which defines the display data.
3043                    # This allows a dax monitoring system to specially handle the data
3044                    # and perform custom rendering.
3045                "javaClassValue": "A String", # Contains value if the data is of java class type.
3046                "label": "A String", # An optional label to display in a dax UI for the element.
3047                "boolValue": True or False, # Contains value if the data is of a boolean type.
3048                "strValue": "A String", # Contains value if the data is of string type.
3049                "key": "A String", # The key identifying the display data.
3050                    # This is intended to be used as a label for the display data
3051                    # when viewed in a dax monitoring system.
3052                "int64Value": "A String", # Contains value if the data is of int64 type.
3053                "timestampValue": "A String", # Contains value if the data is of timestamp type.
3054              },
3055            ],
3056            "outputCollectionName": [ # User  names for all collection outputs to this transform.
3057              "A String",
3058            ],
3059            "id": "A String", # SDK generated id of this transform instance.
3060          },
3061        ],
3062        "displayData": [ # Pipeline level display data.
3063          { # Data provided with a pipeline or transform to provide descriptive info.
3064            "shortStrValue": "A String", # A possible additional shorter value to display.
3065                # For example a java_class_name_value of com.mypackage.MyDoFn
3066                # will be stored with MyDoFn as the short_str_value and
3067                # com.mypackage.MyDoFn as the java_class_name value.
3068                # short_str_value can be displayed and java_class_name_value
3069                # will be displayed as a tooltip.
3070            "durationValue": "A String", # Contains value if the data is of duration type.
3071            "url": "A String", # An optional full URL.
3072            "floatValue": 3.14, # Contains value if the data is of float type.
3073            "namespace": "A String", # The namespace for the key. This is usually a class name or programming
3074                # language namespace (i.e. python module) which defines the display data.
3075                # This allows a dax monitoring system to specially handle the data
3076                # and perform custom rendering.
3077            "javaClassValue": "A String", # Contains value if the data is of java class type.
3078            "label": "A String", # An optional label to display in a dax UI for the element.
3079            "boolValue": True or False, # Contains value if the data is of a boolean type.
3080            "strValue": "A String", # Contains value if the data is of string type.
3081            "key": "A String", # The key identifying the display data.
3082                # This is intended to be used as a label for the display data
3083                # when viewed in a dax monitoring system.
3084            "int64Value": "A String", # Contains value if the data is of int64 type.
3085            "timestampValue": "A String", # Contains value if the data is of timestamp type.
3086          },
3087        ],
3088        "executionPipelineStage": [ # Description of each stage of execution of the pipeline.
3089          { # Description of the composing transforms, names/ids, and input/outputs of a
3090              # stage of execution.  Some composing transforms and sources may have been
3091              # generated by the Dataflow service during execution planning.
3092            "componentSource": [ # Collections produced and consumed by component transforms of this stage.
3093              { # Description of an interstitial value between transforms in an execution
3094                  # stage.
3095                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3096                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3097                    # source is most closely associated.
3098                "name": "A String", # Dataflow service generated name for this source.
3099              },
3100            ],
3101            "kind": "A String", # Type of tranform this stage is executing.
3102            "name": "A String", # Dataflow service generated name for this stage.
3103            "outputSource": [ # Output sources for this stage.
3104              { # Description of an input or output of an execution stage.
3105                "userName": "A String", # Human-readable name for this source; may be user or system generated.
3106                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3107                    # source is most closely associated.
3108                "name": "A String", # Dataflow service generated name for this source.
3109                "sizeBytes": "A String", # Size of the source, if measurable.
3110              },
3111            ],
3112            "inputSource": [ # Input sources for this stage.
3113              { # Description of an input or output of an execution stage.
3114                "userName": "A String", # Human-readable name for this source; may be user or system generated.
3115                "originalTransformOrCollection": "A String", # User name for the original user transform or collection with which this
3116                    # source is most closely associated.
3117                "name": "A String", # Dataflow service generated name for this source.
3118                "sizeBytes": "A String", # Size of the source, if measurable.
3119              },
3120            ],
3121            "componentTransform": [ # Transforms that comprise this execution stage.
3122              { # Description of a transform executed as part of an execution stage.
3123                "userName": "A String", # Human-readable name for this transform; may be user or system generated.
3124                "originalTransform": "A String", # User name for the original user transform with which this transform is
3125                    # most closely associated.
3126                "name": "A String", # Dataflow service generated name for this source.
3127              },
3128            ],
3129            "id": "A String", # Dataflow service generated id for this stage.
3130          },
3131        ],
3132      },
3133      "steps": [ # The top-level steps that constitute the entire job.
3134        { # Defines a particular step within a Cloud Dataflow job.
3135            #
3136            # A job consists of multiple steps, each of which performs some
3137            # specific operation as part of the overall job.  Data is typically
3138            # passed from one step to another as part of the job.
3139            #
3140            # Here's an example of a sequence of steps which together implement a
3141            # Map-Reduce job:
3142            #
3143            #   * Read a collection of data from some source, parsing the
3144            #     collection's elements.
3145            #
3146            #   * Validate the elements.
3147            #
3148            #   * Apply a user-defined function to map each element to some value
3149            #     and extract an element-specific key value.
3150            #
3151            #   * Group elements with the same key into a single element with
3152            #     that key, transforming a multiply-keyed collection into a
3153            #     uniquely-keyed collection.
3154            #
3155            #   * Write the elements out to some data sink.
3156            #
3157            # Note that the Cloud Dataflow service may be used to run many different
3158            # types of jobs, not just Map-Reduce.
3159          "kind": "A String", # The kind of step in the Cloud Dataflow job.
3160          "properties": { # Named properties associated with the step. Each kind of
3161              # predefined step has its own required set of properties.
3162              # Must be provided on Create.  Only retrieved with JOB_VIEW_ALL.
3163            "a_key": "", # Properties of the object.
3164          },
3165          "name": "A String", # The name that identifies the step. This must be unique for each
3166              # step with respect to all other steps in the Cloud Dataflow job.
3167        },
3168      ],
3169      "currentStateTime": "A String", # The timestamp associated with the current state.
3170      "tempFiles": [ # A set of files the system should be aware of that are used
3171          # for temporary storage. These temporary files will be
3172          # removed on job completion.
3173          # No duplicates are allowed.
3174          # No file patterns are supported.
3175          #
3176          # The supported files are:
3177          #
3178          # Google Cloud Storage:
3179          #
3180          #    storage.googleapis.com/{bucket}/{object}
3181          #    bucket.storage.googleapis.com/{object}
3182        "A String",
3183      ],
3184      "stageStates": [ # This field may be mutated by the Cloud Dataflow service;
3185          # callers cannot mutate it.
3186        { # A message describing the state of a particular execution stage.
3187          "executionStageName": "A String", # The name of the execution stage.
3188          "executionStageState": "A String", # Executions stage states allow the same set of values as JobState.
3189          "currentStateTime": "A String", # The time at which the stage transitioned to this state.
3190        },
3191      ],
3192      "type": "A String", # The type of Cloud Dataflow job.
3193      "createTime": "A String", # The timestamp when the job was initially created. Immutable and set by the
3194          # Cloud Dataflow service.
3195      "replaceJobId": "A String", # If this job is an update of an existing job, this field is the job ID
3196          # of the job it replaced.
3197          #
3198          # When sending a `CreateJobRequest`, you can update a job by specifying it
3199          # here. The job named here is stopped, and its intermediate state is
3200          # transferred to this job.
3201      "executionInfo": { # Additional information about how a Cloud Dataflow job will be executed that # Deprecated.
3202          # isn't contained in the submitted job.
3203        "stages": { # A mapping from each stage to the information about that stage.
3204          "a_key": { # Contains information about how a particular
3205              # google.dataflow.v1beta3.Step will be executed.
3206            "stepName": [ # The steps associated with the execution stage.
3207                # Note that stages may have several steps, and that a given step
3208                # might be run by more than one stage.
3209              "A String",
3210            ],
3211          },
3212        },
3213      },
3214    }</pre>
3215</div>
3216
3217</body></html>