1<html><body> 2<style> 3 4body, h1, h2, h3, div, span, p, pre, a { 5 margin: 0; 6 padding: 0; 7 border: 0; 8 font-weight: inherit; 9 font-style: inherit; 10 font-size: 100%; 11 font-family: inherit; 12 vertical-align: baseline; 13} 14 15body { 16 font-size: 13px; 17 padding: 1em; 18} 19 20h1 { 21 font-size: 26px; 22 margin-bottom: 1em; 23} 24 25h2 { 26 font-size: 24px; 27 margin-bottom: 1em; 28} 29 30h3 { 31 font-size: 20px; 32 margin-bottom: 1em; 33 margin-top: 1em; 34} 35 36pre, code { 37 line-height: 1.5; 38 font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace; 39} 40 41pre { 42 margin-top: 0.5em; 43} 44 45h1, h2, h3, p { 46 font-family: Arial, sans serif; 47} 48 49h1, h2, h3 { 50 border-bottom: solid #CCC 1px; 51} 52 53.toc_element { 54 margin-top: 0.5em; 55} 56 57.firstline { 58 margin-left: 2 em; 59} 60 61.method { 62 margin-top: 1em; 63 border: solid 1px #CCC; 64 padding: 1em; 65 background: #EEE; 66} 67 68.details { 69 font-weight: bold; 70 font-size: 14px; 71} 72 73</style> 74 75<h1><a href="dataflow_v1b3.html">Google Dataflow API</a> . <a href="dataflow_v1b3.projects.html">projects</a> . <a href="dataflow_v1b3.projects.locations.html">locations</a> . <a href="dataflow_v1b3.projects.locations.jobs.html">jobs</a> . <a href="dataflow_v1b3.projects.locations.jobs.workItems.html">workItems</a></h1> 76<h2>Instance Methods</h2> 77<p class="toc_element"> 78 <code><a href="#lease">lease(projectId, location, jobId, body, x__xgafv=None)</a></code></p> 79<p class="firstline">Leases a dataflow WorkItem to run.</p> 80<p class="toc_element"> 81 <code><a href="#reportStatus">reportStatus(projectId, location, jobId, body, x__xgafv=None)</a></code></p> 82<p class="firstline">Reports the status of dataflow WorkItems leased by a worker.</p> 83<h3>Method Details</h3> 84<div class="method"> 85 <code class="details" id="lease">lease(projectId, location, jobId, body, x__xgafv=None)</code> 86 <pre>Leases a dataflow WorkItem to run. 87 88Args: 89 projectId: string, Identifies the project this worker belongs to. (required) 90 location: string, The location which contains the WorkItem's job. (required) 91 jobId: string, Identifies the workflow job this worker belongs to. (required) 92 body: object, The request body. (required) 93 The object takes the form of: 94 95{ # Request to lease WorkItems. 96 "workItemTypes": [ # Filter for WorkItem type. 97 "A String", 98 ], 99 "workerCapabilities": [ # Worker capabilities. WorkItems might be limited to workers with specific 100 # capabilities. 101 "A String", 102 ], 103 "requestedLeaseDuration": "A String", # The initial lease period. 104 "workerId": "A String", # Identifies the worker leasing work -- typically the ID of the 105 # virtual machine running the worker. 106 "currentWorkerTime": "A String", # The current timestamp at the worker. 107 "location": "A String", # The location which contains the WorkItem's job. 108 } 109 110 x__xgafv: string, V1 error format. 111 Allowed values 112 1 - v1 error format 113 2 - v2 error format 114 115Returns: 116 An object of the form: 117 118 { # Response to a request to lease WorkItems. 119 "workItems": [ # A list of the leased WorkItems. 120 { # WorkItem represents basic information about a WorkItem to be executed 121 # in the cloud. 122 "reportStatusInterval": "A String", # Recommended reporting interval. 123 "leaseExpireTime": "A String", # Time when the lease on this Work will expire. 124 "seqMapTask": { # Describes a particular function to invoke. # Additional information for SeqMapTask WorkItems. 125 "inputs": [ # Information about each of the inputs. 126 { # Information about a side input of a DoFn or an input of a SeqDoFn. 127 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 128 # If more than one source, then the elements are taken from the 129 # sources, in the specified order if order matters. 130 # At least one source is required. 131 { # A source that records can be read and decoded from. 132 "codec": { # The codec to use to decode data read from the source. 133 "a_key": "", # Properties of the object. 134 }, 135 "baseSpecs": [ # While splitting, sources may specify the produced bundles 136 # as differences against another source, in order to save backend-side 137 # memory and allow bigger jobs. For details, see SourceSplitRequest. 138 # To support this use case, the full set of parameters of the source 139 # is logically obtained by taking the latest explicitly specified value 140 # of each parameter in the order: 141 # base_specs (later items win), spec (overrides anything in base_specs). 142 { 143 "a_key": "", # Properties of the object. 144 }, 145 ], 146 "spec": { # The source to read from, plus its parameters. 147 "a_key": "", # Properties of the object. 148 }, 149 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 150 # doesn't need splitting, and using SourceSplitRequest on it would 151 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 152 # 153 # E.g. a file splitter may set this to true when splitting a single file 154 # into a set of byte ranges of appropriate size, and set this 155 # to false when splitting a filepattern into individual files. 156 # However, for efficiency, a file splitter may decide to produce 157 # file subranges directly from the filepattern to avoid a splitting 158 # round-trip. 159 # 160 # See SourceSplitRequest for an overview of the splitting process. 161 # 162 # This field is meaningful only in the Source objects populated 163 # by the user (e.g. when filling in a DerivedSource). 164 # Source objects supplied by the framework to the user don't have 165 # this field populated. 166 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 167 # avoiding a SourceGetMetadataOperation roundtrip 168 # (see SourceOperationRequest). 169 # 170 # This field is meaningful only in the Source objects populated 171 # by the user (e.g. when filling in a DerivedSource). 172 # Source objects supplied by the framework to the user don't have 173 # this field populated. 174 # and tuning the pipeline, etc. 175 "infinite": True or False, # Specifies that the size of this source is known to be infinite 176 # (this is a streaming source). 177 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 178 # read from this source. This estimate is in terms of external storage 179 # size, before any decompression or other processing done by the reader. 180 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 181 # the (encoded) keys in lexicographically sorted order. 182 }, 183 }, 184 ], 185 "kind": { # How to interpret the source element(s) as a side input value. 186 "a_key": "", # Properties of the object. 187 }, 188 "tag": "A String", # The id of the tag the user code will access this side input by; 189 # this should correspond to the tag of some MultiOutputInfo. 190 }, 191 ], 192 "outputInfos": [ # Information about each of the outputs. 193 { # Information about an output of a SeqMapTask. 194 "tag": "A String", # The id of the TupleTag the user code will tag the output value by. 195 "sink": { # A sink that records can be encoded and written to. # The sink to write the output value to. 196 "codec": { # The codec to use to encode data written to the sink. 197 "a_key": "", # Properties of the object. 198 }, 199 "spec": { # The sink to write to, plus its parameters. 200 "a_key": "", # Properties of the object. 201 }, 202 }, 203 }, 204 ], 205 "stageName": "A String", # System-defined name of the stage containing the SeqDo operation. 206 # Unique across the workflow. 207 "systemName": "A String", # System-defined name of the SeqDo operation. 208 # Unique across the workflow. 209 "userFn": { # The user function to invoke. 210 "a_key": "", # Properties of the object. 211 }, 212 "name": "A String", # The user-provided name of the SeqDo operation. 213 }, 214 "projectId": "A String", # Identifies the cloud project this WorkItem belongs to. 215 "sourceOperationTask": { # A work item that represents the different operations that can be # Additional information for source operation WorkItems. 216 # performed on a user-defined Source specification. 217 "getMetadata": { # A request to compute the SourceMetadata of a Source. # Information about a request to get metadata about a source. 218 "source": { # A source that records can be read and decoded from. # Specification of the source whose metadata should be computed. 219 "codec": { # The codec to use to decode data read from the source. 220 "a_key": "", # Properties of the object. 221 }, 222 "baseSpecs": [ # While splitting, sources may specify the produced bundles 223 # as differences against another source, in order to save backend-side 224 # memory and allow bigger jobs. For details, see SourceSplitRequest. 225 # To support this use case, the full set of parameters of the source 226 # is logically obtained by taking the latest explicitly specified value 227 # of each parameter in the order: 228 # base_specs (later items win), spec (overrides anything in base_specs). 229 { 230 "a_key": "", # Properties of the object. 231 }, 232 ], 233 "spec": { # The source to read from, plus its parameters. 234 "a_key": "", # Properties of the object. 235 }, 236 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 237 # doesn't need splitting, and using SourceSplitRequest on it would 238 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 239 # 240 # E.g. a file splitter may set this to true when splitting a single file 241 # into a set of byte ranges of appropriate size, and set this 242 # to false when splitting a filepattern into individual files. 243 # However, for efficiency, a file splitter may decide to produce 244 # file subranges directly from the filepattern to avoid a splitting 245 # round-trip. 246 # 247 # See SourceSplitRequest for an overview of the splitting process. 248 # 249 # This field is meaningful only in the Source objects populated 250 # by the user (e.g. when filling in a DerivedSource). 251 # Source objects supplied by the framework to the user don't have 252 # this field populated. 253 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 254 # avoiding a SourceGetMetadataOperation roundtrip 255 # (see SourceOperationRequest). 256 # 257 # This field is meaningful only in the Source objects populated 258 # by the user (e.g. when filling in a DerivedSource). 259 # Source objects supplied by the framework to the user don't have 260 # this field populated. 261 # and tuning the pipeline, etc. 262 "infinite": True or False, # Specifies that the size of this source is known to be infinite 263 # (this is a streaming source). 264 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 265 # read from this source. This estimate is in terms of external storage 266 # size, before any decompression or other processing done by the reader. 267 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 268 # the (encoded) keys in lexicographically sorted order. 269 }, 270 }, 271 }, 272 "split": { # Represents the operation to split a high-level Source specification # Information about a request to split a source. 273 # into bundles (parts for parallel processing). 274 # 275 # At a high level, splitting of a source into bundles happens as follows: 276 # SourceSplitRequest is applied to the source. If it returns 277 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, no further splitting happens and the source 278 # is used "as is". Otherwise, splitting is applied recursively to each 279 # produced DerivedSource. 280 # 281 # As an optimization, for any Source, if its does_not_need_splitting is 282 # true, the framework assumes that splitting this source would return 283 # SOURCE_SPLIT_OUTCOME_USE_CURRENT, and doesn't initiate a SourceSplitRequest. 284 # This applies both to the initial source being split and to bundles 285 # produced from it. 286 "source": { # A source that records can be read and decoded from. # Specification of the source to be split. 287 "codec": { # The codec to use to decode data read from the source. 288 "a_key": "", # Properties of the object. 289 }, 290 "baseSpecs": [ # While splitting, sources may specify the produced bundles 291 # as differences against another source, in order to save backend-side 292 # memory and allow bigger jobs. For details, see SourceSplitRequest. 293 # To support this use case, the full set of parameters of the source 294 # is logically obtained by taking the latest explicitly specified value 295 # of each parameter in the order: 296 # base_specs (later items win), spec (overrides anything in base_specs). 297 { 298 "a_key": "", # Properties of the object. 299 }, 300 ], 301 "spec": { # The source to read from, plus its parameters. 302 "a_key": "", # Properties of the object. 303 }, 304 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 305 # doesn't need splitting, and using SourceSplitRequest on it would 306 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 307 # 308 # E.g. a file splitter may set this to true when splitting a single file 309 # into a set of byte ranges of appropriate size, and set this 310 # to false when splitting a filepattern into individual files. 311 # However, for efficiency, a file splitter may decide to produce 312 # file subranges directly from the filepattern to avoid a splitting 313 # round-trip. 314 # 315 # See SourceSplitRequest for an overview of the splitting process. 316 # 317 # This field is meaningful only in the Source objects populated 318 # by the user (e.g. when filling in a DerivedSource). 319 # Source objects supplied by the framework to the user don't have 320 # this field populated. 321 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 322 # avoiding a SourceGetMetadataOperation roundtrip 323 # (see SourceOperationRequest). 324 # 325 # This field is meaningful only in the Source objects populated 326 # by the user (e.g. when filling in a DerivedSource). 327 # Source objects supplied by the framework to the user don't have 328 # this field populated. 329 # and tuning the pipeline, etc. 330 "infinite": True or False, # Specifies that the size of this source is known to be infinite 331 # (this is a streaming source). 332 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 333 # read from this source. This estimate is in terms of external storage 334 # size, before any decompression or other processing done by the reader. 335 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 336 # the (encoded) keys in lexicographically sorted order. 337 }, 338 }, 339 "options": { # Hints for splitting a Source into bundles (parts for parallel # Hints for tuning the splitting process. 340 # processing) using SourceSplitRequest. 341 "desiredShardSizeBytes": "A String", # DEPRECATED in favor of desired_bundle_size_bytes. 342 "desiredBundleSizeBytes": "A String", # The source should be split into a set of bundles where the estimated size 343 # of each is approximately this many bytes. 344 }, 345 }, 346 }, 347 "initialReportIndex": "A String", # The initial index to use when reporting the status of the WorkItem. 348 "mapTask": { # MapTask consists of an ordered set of instructions, each of which # Additional information for MapTask WorkItems. 349 # describes one particular low-level operation for the worker to 350 # perform in order to accomplish the MapTask's WorkItem. 351 # 352 # Each instruction must appear in the list before any instructions which 353 # depends on its output. 354 "systemName": "A String", # System-defined name of this MapTask. 355 # Unique across the workflow. 356 "stageName": "A String", # System-defined name of the stage containing this MapTask. 357 # Unique across the workflow. 358 "instructions": [ # The instructions in the MapTask. 359 { # Describes a particular operation comprising a MapTask. 360 "name": "A String", # User-provided name of this operation. 361 "read": { # An instruction that reads records. # Additional information for Read instructions. 362 # Takes no inputs, produces one output. 363 "source": { # A source that records can be read and decoded from. # The source to read from. 364 "codec": { # The codec to use to decode data read from the source. 365 "a_key": "", # Properties of the object. 366 }, 367 "baseSpecs": [ # While splitting, sources may specify the produced bundles 368 # as differences against another source, in order to save backend-side 369 # memory and allow bigger jobs. For details, see SourceSplitRequest. 370 # To support this use case, the full set of parameters of the source 371 # is logically obtained by taking the latest explicitly specified value 372 # of each parameter in the order: 373 # base_specs (later items win), spec (overrides anything in base_specs). 374 { 375 "a_key": "", # Properties of the object. 376 }, 377 ], 378 "spec": { # The source to read from, plus its parameters. 379 "a_key": "", # Properties of the object. 380 }, 381 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 382 # doesn't need splitting, and using SourceSplitRequest on it would 383 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 384 # 385 # E.g. a file splitter may set this to true when splitting a single file 386 # into a set of byte ranges of appropriate size, and set this 387 # to false when splitting a filepattern into individual files. 388 # However, for efficiency, a file splitter may decide to produce 389 # file subranges directly from the filepattern to avoid a splitting 390 # round-trip. 391 # 392 # See SourceSplitRequest for an overview of the splitting process. 393 # 394 # This field is meaningful only in the Source objects populated 395 # by the user (e.g. when filling in a DerivedSource). 396 # Source objects supplied by the framework to the user don't have 397 # this field populated. 398 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 399 # avoiding a SourceGetMetadataOperation roundtrip 400 # (see SourceOperationRequest). 401 # 402 # This field is meaningful only in the Source objects populated 403 # by the user (e.g. when filling in a DerivedSource). 404 # Source objects supplied by the framework to the user don't have 405 # this field populated. 406 # and tuning the pipeline, etc. 407 "infinite": True or False, # Specifies that the size of this source is known to be infinite 408 # (this is a streaming source). 409 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 410 # read from this source. This estimate is in terms of external storage 411 # size, before any decompression or other processing done by the reader. 412 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 413 # the (encoded) keys in lexicographically sorted order. 414 }, 415 }, 416 }, 417 "outputs": [ # Describes the outputs of the instruction. 418 { # An output of an instruction. 419 "name": "A String", # The user-provided name of this output. 420 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 421 # should only report the key size. 422 "codec": { # The codec to use to encode data being written via this output. 423 "a_key": "", # Properties of the object. 424 }, 425 "systemName": "A String", # System-defined name of this output. 426 # Unique across the workflow. 427 "originalName": "A String", # System-defined name for this output in the original workflow graph. 428 # Outputs that do not contribute to an original instruction do not set this. 429 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 430 # should only report the value size. 431 }, 432 ], 433 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 434 # One input and one output. 435 "sideInputs": [ # Zero or more side inputs. 436 { # Information about a side input of a DoFn or an input of a SeqDoFn. 437 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 438 # If more than one source, then the elements are taken from the 439 # sources, in the specified order if order matters. 440 # At least one source is required. 441 { # A source that records can be read and decoded from. 442 "codec": { # The codec to use to decode data read from the source. 443 "a_key": "", # Properties of the object. 444 }, 445 "baseSpecs": [ # While splitting, sources may specify the produced bundles 446 # as differences against another source, in order to save backend-side 447 # memory and allow bigger jobs. For details, see SourceSplitRequest. 448 # To support this use case, the full set of parameters of the source 449 # is logically obtained by taking the latest explicitly specified value 450 # of each parameter in the order: 451 # base_specs (later items win), spec (overrides anything in base_specs). 452 { 453 "a_key": "", # Properties of the object. 454 }, 455 ], 456 "spec": { # The source to read from, plus its parameters. 457 "a_key": "", # Properties of the object. 458 }, 459 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 460 # doesn't need splitting, and using SourceSplitRequest on it would 461 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 462 # 463 # E.g. a file splitter may set this to true when splitting a single file 464 # into a set of byte ranges of appropriate size, and set this 465 # to false when splitting a filepattern into individual files. 466 # However, for efficiency, a file splitter may decide to produce 467 # file subranges directly from the filepattern to avoid a splitting 468 # round-trip. 469 # 470 # See SourceSplitRequest for an overview of the splitting process. 471 # 472 # This field is meaningful only in the Source objects populated 473 # by the user (e.g. when filling in a DerivedSource). 474 # Source objects supplied by the framework to the user don't have 475 # this field populated. 476 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 477 # avoiding a SourceGetMetadataOperation roundtrip 478 # (see SourceOperationRequest). 479 # 480 # This field is meaningful only in the Source objects populated 481 # by the user (e.g. when filling in a DerivedSource). 482 # Source objects supplied by the framework to the user don't have 483 # this field populated. 484 # and tuning the pipeline, etc. 485 "infinite": True or False, # Specifies that the size of this source is known to be infinite 486 # (this is a streaming source). 487 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 488 # read from this source. This estimate is in terms of external storage 489 # size, before any decompression or other processing done by the reader. 490 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 491 # the (encoded) keys in lexicographically sorted order. 492 }, 493 }, 494 ], 495 "kind": { # How to interpret the source element(s) as a side input value. 496 "a_key": "", # Properties of the object. 497 }, 498 "tag": "A String", # The id of the tag the user code will access this side input by; 499 # this should correspond to the tag of some MultiOutputInfo. 500 }, 501 ], 502 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 503 "a_key": "", # Properties of the object. 504 }, 505 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 506 # CombineValues instruction lifted into this instruction. 507 "valueCombiningFn": { # The value combining function to invoke. 508 "a_key": "", # Properties of the object. 509 }, 510 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 511 # producer instruction. 512 "outputNum": 42, # The output index (origin zero) within the producer. 513 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 514 # the output to be consumed by this input. This index is relative 515 # to the list of instructions in this input's instruction's 516 # containing MapTask. 517 }, 518 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 519 # intermediate store between the GBK and the CombineValues. 520 }, 521 "write": { # An instruction that writes records. # Additional information for Write instructions. 522 # Takes one input, produces no outputs. 523 "input": { # An input of an instruction, as a reference to an output of a # The input. 524 # producer instruction. 525 "outputNum": 42, # The output index (origin zero) within the producer. 526 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 527 # the output to be consumed by this input. This index is relative 528 # to the list of instructions in this input's instruction's 529 # containing MapTask. 530 }, 531 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 532 "codec": { # The codec to use to encode data written to the sink. 533 "a_key": "", # Properties of the object. 534 }, 535 "spec": { # The sink to write to, plus its parameters. 536 "a_key": "", # Properties of the object. 537 }, 538 }, 539 }, 540 "systemName": "A String", # System-defined name of this operation. 541 # Unique across the workflow. 542 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 543 "inputs": [ # Describes the inputs to the flatten instruction. 544 { # An input of an instruction, as a reference to an output of a 545 # producer instruction. 546 "outputNum": 42, # The output index (origin zero) within the producer. 547 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 548 # the output to be consumed by this input. This index is relative 549 # to the list of instructions in this input's instruction's 550 # containing MapTask. 551 }, 552 ], 553 }, 554 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 555 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 556 # Takes one main input and zero or more side inputs, and produces 557 # zero or more outputs. 558 # Runs user code. 559 "sideInputs": [ # Zero or more side inputs. 560 { # Information about a side input of a DoFn or an input of a SeqDoFn. 561 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 562 # If more than one source, then the elements are taken from the 563 # sources, in the specified order if order matters. 564 # At least one source is required. 565 { # A source that records can be read and decoded from. 566 "codec": { # The codec to use to decode data read from the source. 567 "a_key": "", # Properties of the object. 568 }, 569 "baseSpecs": [ # While splitting, sources may specify the produced bundles 570 # as differences against another source, in order to save backend-side 571 # memory and allow bigger jobs. For details, see SourceSplitRequest. 572 # To support this use case, the full set of parameters of the source 573 # is logically obtained by taking the latest explicitly specified value 574 # of each parameter in the order: 575 # base_specs (later items win), spec (overrides anything in base_specs). 576 { 577 "a_key": "", # Properties of the object. 578 }, 579 ], 580 "spec": { # The source to read from, plus its parameters. 581 "a_key": "", # Properties of the object. 582 }, 583 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 584 # doesn't need splitting, and using SourceSplitRequest on it would 585 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 586 # 587 # E.g. a file splitter may set this to true when splitting a single file 588 # into a set of byte ranges of appropriate size, and set this 589 # to false when splitting a filepattern into individual files. 590 # However, for efficiency, a file splitter may decide to produce 591 # file subranges directly from the filepattern to avoid a splitting 592 # round-trip. 593 # 594 # See SourceSplitRequest for an overview of the splitting process. 595 # 596 # This field is meaningful only in the Source objects populated 597 # by the user (e.g. when filling in a DerivedSource). 598 # Source objects supplied by the framework to the user don't have 599 # this field populated. 600 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 601 # avoiding a SourceGetMetadataOperation roundtrip 602 # (see SourceOperationRequest). 603 # 604 # This field is meaningful only in the Source objects populated 605 # by the user (e.g. when filling in a DerivedSource). 606 # Source objects supplied by the framework to the user don't have 607 # this field populated. 608 # and tuning the pipeline, etc. 609 "infinite": True or False, # Specifies that the size of this source is known to be infinite 610 # (this is a streaming source). 611 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 612 # read from this source. This estimate is in terms of external storage 613 # size, before any decompression or other processing done by the reader. 614 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 615 # the (encoded) keys in lexicographically sorted order. 616 }, 617 }, 618 ], 619 "kind": { # How to interpret the source element(s) as a side input value. 620 "a_key": "", # Properties of the object. 621 }, 622 "tag": "A String", # The id of the tag the user code will access this side input by; 623 # this should correspond to the tag of some MultiOutputInfo. 624 }, 625 ], 626 "input": { # An input of an instruction, as a reference to an output of a # The input. 627 # producer instruction. 628 "outputNum": 42, # The output index (origin zero) within the producer. 629 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 630 # the output to be consumed by this input. This index is relative 631 # to the list of instructions in this input's instruction's 632 # containing MapTask. 633 }, 634 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 635 { # Information about an output of a multi-output DoFn. 636 "tag": "A String", # The id of the tag the user code will emit to this output by; this 637 # should correspond to the tag of some SideInputInfo. 638 }, 639 ], 640 "numOutputs": 42, # The number of outputs. 641 "userFn": { # The user function to invoke. 642 "a_key": "", # Properties of the object. 643 }, 644 }, 645 }, 646 ], 647 }, 648 "jobId": "A String", # Identifies the workflow job this WorkItem belongs to. 649 "configuration": "A String", # Work item-specific configuration as an opaque blob. 650 "streamingSetupTask": { # A task which initializes part of a streaming Dataflow job. # Additional information for StreamingSetupTask WorkItems. 651 "workerHarnessPort": 42, # The TCP port used by the worker to communicate with the Dataflow 652 # worker harness. 653 "drain": True or False, # The user has requested drain. 654 "streamingComputationTopology": { # Global topology of the streaming Dataflow job, including all # The global topology of the streaming Dataflow job. 655 # computations and their sharded locations. 656 "computations": [ # The computations associated with a streaming Dataflow job. 657 { # All configuration data for a particular Computation. 658 "inputs": [ # The inputs to the computation. 659 { # Describes a stream of data, either as input to be processed or as 660 # output of a streaming Dataflow job. 661 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 662 # streaming Dataflow job. 663 # stage-to-stage communication. 664 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 665 # job. 666 }, 667 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 668 # out of a streaming Dataflow job. 669 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 670 # If left empty, record deduplication will be strictly best effort. 671 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 672 # If left empty, record timestamps will be generated upon arrival. 673 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 674 "topic": "A String", # A pubsub topic, in the form of 675 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 676 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 677 # custom time timestamps for watermark estimation. 678 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 679 "subscription": "A String", # A pubsub subscription, in the form of 680 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 681 }, 682 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 683 "stateful": True or False, # Whether this source is stateful. 684 }, 685 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 686 "stateFamily": "A String", # Identifies the state family where this side input is stored. 687 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 688 }, 689 }, 690 ], 691 "outputs": [ # The outputs from the computation. 692 { # Describes a stream of data, either as input to be processed or as 693 # output of a streaming Dataflow job. 694 "streamingStageLocation": { # Identifies the location of a streaming computation stage, for # The stream is part of another computation within the current 695 # streaming Dataflow job. 696 # stage-to-stage communication. 697 "streamId": "A String", # Identifies the particular stream within the streaming Dataflow 698 # job. 699 }, 700 "pubsubLocation": { # Identifies a pubsub location to use for transferring data into or # The stream is a pubsub stream. 701 # out of a streaming Dataflow job. 702 "idLabel": "A String", # If set, contains a pubsub label from which to extract record ids. 703 # If left empty, record deduplication will be strictly best effort. 704 "timestampLabel": "A String", # If set, contains a pubsub label from which to extract record timestamps. 705 # If left empty, record timestamps will be generated upon arrival. 706 "dropLateData": True or False, # Indicates whether the pipeline allows late-arriving data. 707 "topic": "A String", # A pubsub topic, in the form of 708 # "pubsub.googleapis.com/topics/<project-id>/<topic-name>" 709 "trackingSubscription": "A String", # If set, specifies the pubsub subscription that will be used for tracking 710 # custom time timestamps for watermark estimation. 711 "withAttributes": True or False, # If true, then the client has requested to get pubsub attributes. 712 "subscription": "A String", # A pubsub subscription, in the form of 713 # "pubsub.googleapis.com/subscriptions/<project-id>/<subscription-name>" 714 }, 715 "customSourceLocation": { # Identifies the location of a custom souce. # The stream is a custom source. 716 "stateful": True or False, # Whether this source is stateful. 717 }, 718 "sideInputLocation": { # Identifies the location of a streaming side input. # The stream is a streaming side input. 719 "stateFamily": "A String", # Identifies the state family where this side input is stored. 720 "tag": "A String", # Identifies the particular side input within the streaming Dataflow job. 721 }, 722 }, 723 ], 724 "keyRanges": [ # The key ranges processed by the computation. 725 { # Location information for a specific key-range of a sharded computation. 726 # Currently we only support UTF-8 character splits to simplify encoding into 727 # JSON. 728 "deprecatedPersistentDirectory": "A String", # DEPRECATED. The location of the persistent state for this range, as a 729 # persistent directory in the worker local filesystem. 730 "start": "A String", # The start (inclusive) of the key range. 731 "deliveryEndpoint": "A String", # The physical location of this range assignment to be used for 732 # streaming computation cross-worker message delivery. 733 "end": "A String", # The end (exclusive) of the key range. 734 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 735 # This name is local to the Google Cloud Platform project and uniquely 736 # identifies the disk within that project, for example 737 # "myproject-1014-104817-4c2-harness-0-disk-1". 738 }, 739 ], 740 "computationId": "A String", # The ID of the computation. 741 "systemStageName": "A String", # The system stage name. 742 "stateFamilies": [ # The state family values. 743 { # State family configuration. 744 "stateFamily": "A String", # The state family value. 745 "isRead": True or False, # If true, this family corresponds to a read operation. 746 }, 747 ], 748 }, 749 ], 750 "dataDiskAssignments": [ # The disks assigned to a streaming Dataflow job. 751 { # Data disk assignment for a given VM instance. 752 "vmInstance": "A String", # VM instance name the data disks mounted to, for example 753 # "myproject-1014-104817-4c2-harness-0". 754 "dataDisks": [ # Mounted data disks. The order is important a data disk's 0-based index in 755 # this list defines which persistent directory the disk is mounted to, for 756 # example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, 757 # { "myproject-1014-104817-4c2-harness-0-disk-1" }. 758 "A String", 759 ], 760 }, 761 ], 762 "forwardingKeyBits": 42, # The size (in bits) of keys that will be assigned to source messages. 763 "userStageToComputationNameMap": { # Maps user stage names to stable computation names. 764 "a_key": "A String", 765 }, 766 "persistentStateVersion": 42, # Version number for persistent state. 767 }, 768 "receiveWorkPort": 42, # The TCP port on which the worker should listen for messages from 769 # other streaming computation workers. 770 }, 771 "id": "A String", # Identifies this WorkItem. 772 "streamingConfigTask": { # A task that carries configuration information for streaming computations. # Additional information for StreamingConfigTask WorkItems. 773 "userStepToStateFamilyNameMap": { # Map from user step names to state families. 774 "a_key": "A String", 775 }, 776 "windmillServicePort": "A String", # If present, the worker must use this port to communicate with Windmill 777 # Service dispatchers. Only applicable when windmill_service_endpoint is 778 # specified. 779 "streamingComputationConfigs": [ # Set of computation configuration information. 780 { # Configuration information for a single streaming computation. 781 "computationId": "A String", # Unique identifier for this computation. 782 "systemName": "A String", # System defined name for this computation. 783 "stageName": "A String", # Stage name of this computation. 784 "instructions": [ # Instructions that comprise the computation. 785 { # Describes a particular operation comprising a MapTask. 786 "name": "A String", # User-provided name of this operation. 787 "read": { # An instruction that reads records. # Additional information for Read instructions. 788 # Takes no inputs, produces one output. 789 "source": { # A source that records can be read and decoded from. # The source to read from. 790 "codec": { # The codec to use to decode data read from the source. 791 "a_key": "", # Properties of the object. 792 }, 793 "baseSpecs": [ # While splitting, sources may specify the produced bundles 794 # as differences against another source, in order to save backend-side 795 # memory and allow bigger jobs. For details, see SourceSplitRequest. 796 # To support this use case, the full set of parameters of the source 797 # is logically obtained by taking the latest explicitly specified value 798 # of each parameter in the order: 799 # base_specs (later items win), spec (overrides anything in base_specs). 800 { 801 "a_key": "", # Properties of the object. 802 }, 803 ], 804 "spec": { # The source to read from, plus its parameters. 805 "a_key": "", # Properties of the object. 806 }, 807 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 808 # doesn't need splitting, and using SourceSplitRequest on it would 809 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 810 # 811 # E.g. a file splitter may set this to true when splitting a single file 812 # into a set of byte ranges of appropriate size, and set this 813 # to false when splitting a filepattern into individual files. 814 # However, for efficiency, a file splitter may decide to produce 815 # file subranges directly from the filepattern to avoid a splitting 816 # round-trip. 817 # 818 # See SourceSplitRequest for an overview of the splitting process. 819 # 820 # This field is meaningful only in the Source objects populated 821 # by the user (e.g. when filling in a DerivedSource). 822 # Source objects supplied by the framework to the user don't have 823 # this field populated. 824 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 825 # avoiding a SourceGetMetadataOperation roundtrip 826 # (see SourceOperationRequest). 827 # 828 # This field is meaningful only in the Source objects populated 829 # by the user (e.g. when filling in a DerivedSource). 830 # Source objects supplied by the framework to the user don't have 831 # this field populated. 832 # and tuning the pipeline, etc. 833 "infinite": True or False, # Specifies that the size of this source is known to be infinite 834 # (this is a streaming source). 835 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 836 # read from this source. This estimate is in terms of external storage 837 # size, before any decompression or other processing done by the reader. 838 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 839 # the (encoded) keys in lexicographically sorted order. 840 }, 841 }, 842 }, 843 "outputs": [ # Describes the outputs of the instruction. 844 { # An output of an instruction. 845 "name": "A String", # The user-provided name of this output. 846 "onlyCountKeyBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 847 # should only report the key size. 848 "codec": { # The codec to use to encode data being written via this output. 849 "a_key": "", # Properties of the object. 850 }, 851 "systemName": "A String", # System-defined name of this output. 852 # Unique across the workflow. 853 "originalName": "A String", # System-defined name for this output in the original workflow graph. 854 # Outputs that do not contribute to an original instruction do not set this. 855 "onlyCountValueBytes": True or False, # For system-generated byte and mean byte metrics, certain instructions 856 # should only report the value size. 857 }, 858 ], 859 "partialGroupByKey": { # An instruction that does a partial group-by-key. # Additional information for PartialGroupByKey instructions. 860 # One input and one output. 861 "sideInputs": [ # Zero or more side inputs. 862 { # Information about a side input of a DoFn or an input of a SeqDoFn. 863 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 864 # If more than one source, then the elements are taken from the 865 # sources, in the specified order if order matters. 866 # At least one source is required. 867 { # A source that records can be read and decoded from. 868 "codec": { # The codec to use to decode data read from the source. 869 "a_key": "", # Properties of the object. 870 }, 871 "baseSpecs": [ # While splitting, sources may specify the produced bundles 872 # as differences against another source, in order to save backend-side 873 # memory and allow bigger jobs. For details, see SourceSplitRequest. 874 # To support this use case, the full set of parameters of the source 875 # is logically obtained by taking the latest explicitly specified value 876 # of each parameter in the order: 877 # base_specs (later items win), spec (overrides anything in base_specs). 878 { 879 "a_key": "", # Properties of the object. 880 }, 881 ], 882 "spec": { # The source to read from, plus its parameters. 883 "a_key": "", # Properties of the object. 884 }, 885 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 886 # doesn't need splitting, and using SourceSplitRequest on it would 887 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 888 # 889 # E.g. a file splitter may set this to true when splitting a single file 890 # into a set of byte ranges of appropriate size, and set this 891 # to false when splitting a filepattern into individual files. 892 # However, for efficiency, a file splitter may decide to produce 893 # file subranges directly from the filepattern to avoid a splitting 894 # round-trip. 895 # 896 # See SourceSplitRequest for an overview of the splitting process. 897 # 898 # This field is meaningful only in the Source objects populated 899 # by the user (e.g. when filling in a DerivedSource). 900 # Source objects supplied by the framework to the user don't have 901 # this field populated. 902 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 903 # avoiding a SourceGetMetadataOperation roundtrip 904 # (see SourceOperationRequest). 905 # 906 # This field is meaningful only in the Source objects populated 907 # by the user (e.g. when filling in a DerivedSource). 908 # Source objects supplied by the framework to the user don't have 909 # this field populated. 910 # and tuning the pipeline, etc. 911 "infinite": True or False, # Specifies that the size of this source is known to be infinite 912 # (this is a streaming source). 913 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 914 # read from this source. This estimate is in terms of external storage 915 # size, before any decompression or other processing done by the reader. 916 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 917 # the (encoded) keys in lexicographically sorted order. 918 }, 919 }, 920 ], 921 "kind": { # How to interpret the source element(s) as a side input value. 922 "a_key": "", # Properties of the object. 923 }, 924 "tag": "A String", # The id of the tag the user code will access this side input by; 925 # this should correspond to the tag of some MultiOutputInfo. 926 }, 927 ], 928 "inputElementCodec": { # The codec to use for interpreting an element in the input PTable. 929 "a_key": "", # Properties of the object. 930 }, 931 "originalCombineValuesStepName": "A String", # If this instruction includes a combining function, this is the name of the 932 # CombineValues instruction lifted into this instruction. 933 "valueCombiningFn": { # The value combining function to invoke. 934 "a_key": "", # Properties of the object. 935 }, 936 "input": { # An input of an instruction, as a reference to an output of a # Describes the input to the partial group-by-key instruction. 937 # producer instruction. 938 "outputNum": 42, # The output index (origin zero) within the producer. 939 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 940 # the output to be consumed by this input. This index is relative 941 # to the list of instructions in this input's instruction's 942 # containing MapTask. 943 }, 944 "originalCombineValuesInputStoreName": "A String", # If this instruction includes a combining function this is the name of the 945 # intermediate store between the GBK and the CombineValues. 946 }, 947 "write": { # An instruction that writes records. # Additional information for Write instructions. 948 # Takes one input, produces no outputs. 949 "input": { # An input of an instruction, as a reference to an output of a # The input. 950 # producer instruction. 951 "outputNum": 42, # The output index (origin zero) within the producer. 952 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 953 # the output to be consumed by this input. This index is relative 954 # to the list of instructions in this input's instruction's 955 # containing MapTask. 956 }, 957 "sink": { # A sink that records can be encoded and written to. # The sink to write to. 958 "codec": { # The codec to use to encode data written to the sink. 959 "a_key": "", # Properties of the object. 960 }, 961 "spec": { # The sink to write to, plus its parameters. 962 "a_key": "", # Properties of the object. 963 }, 964 }, 965 }, 966 "systemName": "A String", # System-defined name of this operation. 967 # Unique across the workflow. 968 "flatten": { # An instruction that copies its inputs (zero or more) to its (single) output. # Additional information for Flatten instructions. 969 "inputs": [ # Describes the inputs to the flatten instruction. 970 { # An input of an instruction, as a reference to an output of a 971 # producer instruction. 972 "outputNum": 42, # The output index (origin zero) within the producer. 973 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 974 # the output to be consumed by this input. This index is relative 975 # to the list of instructions in this input's instruction's 976 # containing MapTask. 977 }, 978 ], 979 }, 980 "originalName": "A String", # System-defined name for the operation in the original workflow graph. 981 "parDo": { # An instruction that does a ParDo operation. # Additional information for ParDo instructions. 982 # Takes one main input and zero or more side inputs, and produces 983 # zero or more outputs. 984 # Runs user code. 985 "sideInputs": [ # Zero or more side inputs. 986 { # Information about a side input of a DoFn or an input of a SeqDoFn. 987 "sources": [ # The source(s) to read element(s) from to get the value of this side input. 988 # If more than one source, then the elements are taken from the 989 # sources, in the specified order if order matters. 990 # At least one source is required. 991 { # A source that records can be read and decoded from. 992 "codec": { # The codec to use to decode data read from the source. 993 "a_key": "", # Properties of the object. 994 }, 995 "baseSpecs": [ # While splitting, sources may specify the produced bundles 996 # as differences against another source, in order to save backend-side 997 # memory and allow bigger jobs. For details, see SourceSplitRequest. 998 # To support this use case, the full set of parameters of the source 999 # is logically obtained by taking the latest explicitly specified value 1000 # of each parameter in the order: 1001 # base_specs (later items win), spec (overrides anything in base_specs). 1002 { 1003 "a_key": "", # Properties of the object. 1004 }, 1005 ], 1006 "spec": { # The source to read from, plus its parameters. 1007 "a_key": "", # Properties of the object. 1008 }, 1009 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1010 # doesn't need splitting, and using SourceSplitRequest on it would 1011 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1012 # 1013 # E.g. a file splitter may set this to true when splitting a single file 1014 # into a set of byte ranges of appropriate size, and set this 1015 # to false when splitting a filepattern into individual files. 1016 # However, for efficiency, a file splitter may decide to produce 1017 # file subranges directly from the filepattern to avoid a splitting 1018 # round-trip. 1019 # 1020 # See SourceSplitRequest for an overview of the splitting process. 1021 # 1022 # This field is meaningful only in the Source objects populated 1023 # by the user (e.g. when filling in a DerivedSource). 1024 # Source objects supplied by the framework to the user don't have 1025 # this field populated. 1026 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1027 # avoiding a SourceGetMetadataOperation roundtrip 1028 # (see SourceOperationRequest). 1029 # 1030 # This field is meaningful only in the Source objects populated 1031 # by the user (e.g. when filling in a DerivedSource). 1032 # Source objects supplied by the framework to the user don't have 1033 # this field populated. 1034 # and tuning the pipeline, etc. 1035 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1036 # (this is a streaming source). 1037 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1038 # read from this source. This estimate is in terms of external storage 1039 # size, before any decompression or other processing done by the reader. 1040 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1041 # the (encoded) keys in lexicographically sorted order. 1042 }, 1043 }, 1044 ], 1045 "kind": { # How to interpret the source element(s) as a side input value. 1046 "a_key": "", # Properties of the object. 1047 }, 1048 "tag": "A String", # The id of the tag the user code will access this side input by; 1049 # this should correspond to the tag of some MultiOutputInfo. 1050 }, 1051 ], 1052 "input": { # An input of an instruction, as a reference to an output of a # The input. 1053 # producer instruction. 1054 "outputNum": 42, # The output index (origin zero) within the producer. 1055 "producerInstructionIndex": 42, # The index (origin zero) of the parallel instruction that produces 1056 # the output to be consumed by this input. This index is relative 1057 # to the list of instructions in this input's instruction's 1058 # containing MapTask. 1059 }, 1060 "multiOutputInfos": [ # Information about each of the outputs, if user_fn is a MultiDoFn. 1061 { # Information about an output of a multi-output DoFn. 1062 "tag": "A String", # The id of the tag the user code will emit to this output by; this 1063 # should correspond to the tag of some SideInputInfo. 1064 }, 1065 ], 1066 "numOutputs": 42, # The number of outputs. 1067 "userFn": { # The user function to invoke. 1068 "a_key": "", # Properties of the object. 1069 }, 1070 }, 1071 }, 1072 ], 1073 }, 1074 ], 1075 "windmillServiceEndpoint": "A String", # If present, the worker must use this endpoint to communicate with Windmill 1076 # Service dispatchers, otherwise the worker must continue to use whatever 1077 # endpoint it had been using. 1078 }, 1079 "packages": [ # Any required packages that need to be fetched in order to execute 1080 # this WorkItem. 1081 { # The packages that must be installed in order for a worker to run the 1082 # steps of the Cloud Dataflow job that will be assigned to its worker 1083 # pool. 1084 # 1085 # This is the mechanism by which the Cloud Dataflow SDK causes code to 1086 # be loaded onto the workers. For example, the Cloud Dataflow Java SDK 1087 # might use this to install jars containing the user's code and all of the 1088 # various dependencies (libraries, data files, etc.) required in order 1089 # for that code to run. 1090 "location": "A String", # The resource to read the package from. The supported resource type is: 1091 # 1092 # Google Cloud Storage: 1093 # 1094 # storage.googleapis.com/{bucket} 1095 # bucket.storage.googleapis.com/ 1096 "name": "A String", # The name of the package. 1097 }, 1098 ], 1099 "shellTask": { # A task which consists of a shell command for the worker to execute. # Additional information for ShellTask WorkItems. 1100 "command": "A String", # The shell command to run. 1101 "exitCode": 42, # Exit code for the task. 1102 }, 1103 "streamingComputationTask": { # A task which describes what action should be performed for the specified # Additional information for StreamingComputationTask WorkItems. 1104 # streaming computation ranges. 1105 "taskType": "A String", # A type of streaming computation task. 1106 "computationRanges": [ # Contains ranges of a streaming computation this task should apply to. 1107 { # Describes full or partial data disk assignment information of the computation 1108 # ranges. 1109 "computationId": "A String", # The ID of the computation. 1110 "rangeAssignments": [ # Data disk assignments for ranges from this computation. 1111 { # Data disk assignment information for a specific key-range of a sharded 1112 # computation. 1113 # Currently we only support UTF-8 character splits to simplify encoding into 1114 # JSON. 1115 "start": "A String", # The start (inclusive) of the key range. 1116 "end": "A String", # The end (exclusive) of the key range. 1117 "dataDisk": "A String", # The name of the data disk where data for this range is stored. 1118 # This name is local to the Google Cloud Platform project and uniquely 1119 # identifies the disk within that project, for example 1120 # "myproject-1014-104817-4c2-harness-0-disk-1". 1121 }, 1122 ], 1123 }, 1124 ], 1125 "dataDisks": [ # Describes the set of data disks this task should apply to. 1126 { # Describes mounted data disk. 1127 "dataDisk": "A String", # The name of the data disk. 1128 # This name is local to the Google Cloud Platform project and uniquely 1129 # identifies the disk within that project, for example 1130 # "myproject-1014-104817-4c2-harness-0-disk-1". 1131 }, 1132 ], 1133 }, 1134 }, 1135 ], 1136 }</pre> 1137</div> 1138 1139<div class="method"> 1140 <code class="details" id="reportStatus">reportStatus(projectId, location, jobId, body, x__xgafv=None)</code> 1141 <pre>Reports the status of dataflow WorkItems leased by a worker. 1142 1143Args: 1144 projectId: string, The project which owns the WorkItem's job. (required) 1145 location: string, The location which contains the WorkItem's job. (required) 1146 jobId: string, The job which the WorkItem is part of. (required) 1147 body: object, The request body. (required) 1148 The object takes the form of: 1149 1150{ # Request to report the status of WorkItems. 1151 "workerId": "A String", # The ID of the worker reporting the WorkItem status. If this 1152 # does not match the ID of the worker which the Dataflow service 1153 # believes currently has the lease on the WorkItem, the report 1154 # will be dropped (with an error response). 1155 "currentWorkerTime": "A String", # The current timestamp at the worker. 1156 "workItemStatuses": [ # The order is unimportant, except that the order of the 1157 # WorkItemServiceState messages in the ReportWorkItemStatusResponse 1158 # corresponds to the order of WorkItemStatus messages here. 1159 { # Conveys a worker's progress through the work described by a WorkItem. 1160 "reportIndex": "A String", # The report index. When a WorkItem is leased, the lease will 1161 # contain an initial report index. When a WorkItem's status is 1162 # reported to the system, the report should be sent with 1163 # that report index, and the response will contain the index the 1164 # worker should use for the next report. Reports received with 1165 # unexpected index values will be rejected by the service. 1166 # 1167 # In order to preserve idempotency, the worker should not alter the 1168 # contents of a report, even if the worker must submit the same 1169 # report multiple times before getting back a response. The worker 1170 # should not submit a subsequent report until the response for the 1171 # previous report had been received from the service. 1172 "errors": [ # Specifies errors which occurred during processing. If errors are 1173 # provided, and completed = true, then the WorkItem is considered 1174 # to have failed. 1175 { # The `Status` type defines a logical error model that is suitable for different 1176 # programming environments, including REST APIs and RPC APIs. It is used by 1177 # [gRPC](https://github.com/grpc). The error model is designed to be: 1178 # 1179 # - Simple to use and understand for most users 1180 # - Flexible enough to meet unexpected needs 1181 # 1182 # # Overview 1183 # 1184 # The `Status` message contains three pieces of data: error code, error message, 1185 # and error details. The error code should be an enum value of 1186 # google.rpc.Code, but it may accept additional error codes if needed. The 1187 # error message should be a developer-facing English message that helps 1188 # developers *understand* and *resolve* the error. If a localized user-facing 1189 # error message is needed, put the localized message in the error details or 1190 # localize it in the client. The optional error details may contain arbitrary 1191 # information about the error. There is a predefined set of error detail types 1192 # in the package `google.rpc` that can be used for common error conditions. 1193 # 1194 # # Language mapping 1195 # 1196 # The `Status` message is the logical representation of the error model, but it 1197 # is not necessarily the actual wire format. When the `Status` message is 1198 # exposed in different client libraries and different wire protocols, it can be 1199 # mapped differently. For example, it will likely be mapped to some exceptions 1200 # in Java, but more likely mapped to some error codes in C. 1201 # 1202 # # Other uses 1203 # 1204 # The error model and the `Status` message can be used in a variety of 1205 # environments, either with or without APIs, to provide a 1206 # consistent developer experience across different environments. 1207 # 1208 # Example uses of this error model include: 1209 # 1210 # - Partial errors. If a service needs to return partial errors to the client, 1211 # it may embed the `Status` in the normal response to indicate the partial 1212 # errors. 1213 # 1214 # - Workflow errors. A typical workflow has multiple steps. Each step may 1215 # have a `Status` message for error reporting. 1216 # 1217 # - Batch operations. If a client uses batch request and batch response, the 1218 # `Status` message should be used directly inside batch response, one for 1219 # each error sub-response. 1220 # 1221 # - Asynchronous operations. If an API call embeds asynchronous operation 1222 # results in its response, the status of those operations should be 1223 # represented directly using the `Status` message. 1224 # 1225 # - Logging. If some API errors are stored in logs, the message `Status` could 1226 # be used directly after any stripping needed for security/privacy reasons. 1227 "message": "A String", # A developer-facing error message, which should be in English. Any 1228 # user-facing error message should be localized and sent in the 1229 # google.rpc.Status.details field, or localized by the client. 1230 "code": 42, # The status code, which should be an enum value of google.rpc.Code. 1231 "details": [ # A list of messages that carry the error details. There will be a 1232 # common set of message types for APIs to use. 1233 { 1234 "a_key": "", # Properties of the object. Contains field @type with type URL. 1235 }, 1236 ], 1237 }, 1238 ], 1239 "sourceOperationResponse": { # The result of a SourceOperationRequest, specified in # If the work item represented a SourceOperationRequest, and the work 1240 # is completed, contains the result of the operation. 1241 # ReportWorkItemStatusRequest.source_operation when the work item 1242 # is completed. 1243 "getMetadata": { # The result of a SourceGetMetadataOperation. # A response to a request to get metadata about a source. 1244 "metadata": { # Metadata about a Source useful for automatically optimizing # The computed metadata. 1245 # and tuning the pipeline, etc. 1246 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1247 # (this is a streaming source). 1248 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1249 # read from this source. This estimate is in terms of external storage 1250 # size, before any decompression or other processing done by the reader. 1251 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1252 # the (encoded) keys in lexicographically sorted order. 1253 }, 1254 }, 1255 "split": { # The response to a SourceSplitRequest. # A response to a request to split a source. 1256 "outcome": "A String", # Indicates whether splitting happened and produced a list of bundles. 1257 # If this is USE_CURRENT_SOURCE_AS_IS, the current source should 1258 # be processed "as is" without splitting. "bundles" is ignored in this case. 1259 # If this is SPLITTING_HAPPENED, then "bundles" contains a list of 1260 # bundles into which the source was split. 1261 "bundles": [ # If outcome is SPLITTING_HAPPENED, then this is a list of bundles 1262 # into which the source was split. Otherwise this field is ignored. 1263 # This list can be empty, which means the source represents an empty input. 1264 { # Specification of one of the bundles produced as a result of splitting 1265 # a Source (e.g. when executing a SourceSplitRequest, or when 1266 # splitting an active task using WorkItemStatus.dynamic_source_split), 1267 # relative to the source being split. 1268 "derivationMode": "A String", # What source to base the produced source on (if any). 1269 "source": { # A source that records can be read and decoded from. # Specification of the source. 1270 "codec": { # The codec to use to decode data read from the source. 1271 "a_key": "", # Properties of the object. 1272 }, 1273 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1274 # as differences against another source, in order to save backend-side 1275 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1276 # To support this use case, the full set of parameters of the source 1277 # is logically obtained by taking the latest explicitly specified value 1278 # of each parameter in the order: 1279 # base_specs (later items win), spec (overrides anything in base_specs). 1280 { 1281 "a_key": "", # Properties of the object. 1282 }, 1283 ], 1284 "spec": { # The source to read from, plus its parameters. 1285 "a_key": "", # Properties of the object. 1286 }, 1287 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1288 # doesn't need splitting, and using SourceSplitRequest on it would 1289 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1290 # 1291 # E.g. a file splitter may set this to true when splitting a single file 1292 # into a set of byte ranges of appropriate size, and set this 1293 # to false when splitting a filepattern into individual files. 1294 # However, for efficiency, a file splitter may decide to produce 1295 # file subranges directly from the filepattern to avoid a splitting 1296 # round-trip. 1297 # 1298 # See SourceSplitRequest for an overview of the splitting process. 1299 # 1300 # This field is meaningful only in the Source objects populated 1301 # by the user (e.g. when filling in a DerivedSource). 1302 # Source objects supplied by the framework to the user don't have 1303 # this field populated. 1304 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1305 # avoiding a SourceGetMetadataOperation roundtrip 1306 # (see SourceOperationRequest). 1307 # 1308 # This field is meaningful only in the Source objects populated 1309 # by the user (e.g. when filling in a DerivedSource). 1310 # Source objects supplied by the framework to the user don't have 1311 # this field populated. 1312 # and tuning the pipeline, etc. 1313 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1314 # (this is a streaming source). 1315 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1316 # read from this source. This estimate is in terms of external storage 1317 # size, before any decompression or other processing done by the reader. 1318 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1319 # the (encoded) keys in lexicographically sorted order. 1320 }, 1321 }, 1322 }, 1323 ], 1324 "shards": [ # DEPRECATED in favor of bundles. 1325 { # DEPRECATED in favor of DerivedSource. 1326 "derivationMode": "A String", # DEPRECATED 1327 "source": { # A source that records can be read and decoded from. # DEPRECATED 1328 "codec": { # The codec to use to decode data read from the source. 1329 "a_key": "", # Properties of the object. 1330 }, 1331 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1332 # as differences against another source, in order to save backend-side 1333 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1334 # To support this use case, the full set of parameters of the source 1335 # is logically obtained by taking the latest explicitly specified value 1336 # of each parameter in the order: 1337 # base_specs (later items win), spec (overrides anything in base_specs). 1338 { 1339 "a_key": "", # Properties of the object. 1340 }, 1341 ], 1342 "spec": { # The source to read from, plus its parameters. 1343 "a_key": "", # Properties of the object. 1344 }, 1345 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1346 # doesn't need splitting, and using SourceSplitRequest on it would 1347 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1348 # 1349 # E.g. a file splitter may set this to true when splitting a single file 1350 # into a set of byte ranges of appropriate size, and set this 1351 # to false when splitting a filepattern into individual files. 1352 # However, for efficiency, a file splitter may decide to produce 1353 # file subranges directly from the filepattern to avoid a splitting 1354 # round-trip. 1355 # 1356 # See SourceSplitRequest for an overview of the splitting process. 1357 # 1358 # This field is meaningful only in the Source objects populated 1359 # by the user (e.g. when filling in a DerivedSource). 1360 # Source objects supplied by the framework to the user don't have 1361 # this field populated. 1362 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1363 # avoiding a SourceGetMetadataOperation roundtrip 1364 # (see SourceOperationRequest). 1365 # 1366 # This field is meaningful only in the Source objects populated 1367 # by the user (e.g. when filling in a DerivedSource). 1368 # Source objects supplied by the framework to the user don't have 1369 # this field populated. 1370 # and tuning the pipeline, etc. 1371 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1372 # (this is a streaming source). 1373 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1374 # read from this source. This estimate is in terms of external storage 1375 # size, before any decompression or other processing done by the reader. 1376 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1377 # the (encoded) keys in lexicographically sorted order. 1378 }, 1379 }, 1380 }, 1381 ], 1382 }, 1383 }, 1384 "stopPosition": { # Position defines a position within a collection of data. The value # A worker may split an active map task in two parts, "primary" and 1385 # "residual", continuing to process the primary part and returning the 1386 # residual part into the pool of available work. 1387 # This event is called a "dynamic split" and is critical to the dynamic 1388 # work rebalancing feature. The two obtained sub-tasks are called 1389 # "parts" of the split. 1390 # The parts, if concatenated, must represent the same input as would 1391 # be read by the current task if the split did not happen. 1392 # The exact way in which the original task is decomposed into the two 1393 # parts is specified either as a position demarcating them 1394 # (stop_position), or explicitly as two DerivedSources, if this 1395 # task consumes a user-defined source type (dynamic_source_split). 1396 # 1397 # The "current" task is adjusted as a result of the split: after a task 1398 # with range [A, B) sends a stop_position update at C, its range is 1399 # considered to be [A, C), e.g.: 1400 # * Progress should be interpreted relative to the new range, e.g. 1401 # "75% completed" means "75% of [A, C) completed" 1402 # * The worker should interpret proposed_stop_position relative to the 1403 # new range, e.g. "split at 68%" should be interpreted as 1404 # "split at 68% of [A, C)". 1405 # * If the worker chooses to split again using stop_position, only 1406 # stop_positions in [A, C) will be accepted. 1407 # * Etc. 1408 # dynamic_source_split has similar semantics: e.g., if a task with 1409 # source S splits using dynamic_source_split into {P, R} 1410 # (where P and R must be together equivalent to S), then subsequent 1411 # progress and proposed_stop_position should be interpreted relative 1412 # to P, and in a potential subsequent dynamic_source_split into {P', R'}, 1413 # P' and R' must be together equivalent to P, etc. 1414 # can be either the end position, a key (used with ordered 1415 # collections), a byte offset, or a record index. 1416 "end": True or False, # Position is past all other positions. Also useful for the end 1417 # position of an unbounded range. 1418 "recordIndex": "A String", # Position is a record index. 1419 "byteOffset": "A String", # Position is a byte offset. 1420 "key": "A String", # Position is a string key, ordered lexicographically. 1421 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1422 # position. A ConcatPosition can be used by a reader of a source that 1423 # encapsulates a set of other sources. 1424 "position": # Object with schema name: Position # Position within the inner source. 1425 "index": 42, # Index of the inner source. 1426 }, 1427 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1428 # sharding). 1429 }, 1430 "sourceFork": { # DEPRECATED in favor of DynamicSourceSplit. # DEPRECATED in favor of dynamic_source_split. 1431 "residualSource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1432 # a Source (e.g. when executing a SourceSplitRequest, or when 1433 # splitting an active task using WorkItemStatus.dynamic_source_split), 1434 # relative to the source being split. 1435 "derivationMode": "A String", # What source to base the produced source on (if any). 1436 "source": { # A source that records can be read and decoded from. # Specification of the source. 1437 "codec": { # The codec to use to decode data read from the source. 1438 "a_key": "", # Properties of the object. 1439 }, 1440 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1441 # as differences against another source, in order to save backend-side 1442 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1443 # To support this use case, the full set of parameters of the source 1444 # is logically obtained by taking the latest explicitly specified value 1445 # of each parameter in the order: 1446 # base_specs (later items win), spec (overrides anything in base_specs). 1447 { 1448 "a_key": "", # Properties of the object. 1449 }, 1450 ], 1451 "spec": { # The source to read from, plus its parameters. 1452 "a_key": "", # Properties of the object. 1453 }, 1454 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1455 # doesn't need splitting, and using SourceSplitRequest on it would 1456 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1457 # 1458 # E.g. a file splitter may set this to true when splitting a single file 1459 # into a set of byte ranges of appropriate size, and set this 1460 # to false when splitting a filepattern into individual files. 1461 # However, for efficiency, a file splitter may decide to produce 1462 # file subranges directly from the filepattern to avoid a splitting 1463 # round-trip. 1464 # 1465 # See SourceSplitRequest for an overview of the splitting process. 1466 # 1467 # This field is meaningful only in the Source objects populated 1468 # by the user (e.g. when filling in a DerivedSource). 1469 # Source objects supplied by the framework to the user don't have 1470 # this field populated. 1471 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1472 # avoiding a SourceGetMetadataOperation roundtrip 1473 # (see SourceOperationRequest). 1474 # 1475 # This field is meaningful only in the Source objects populated 1476 # by the user (e.g. when filling in a DerivedSource). 1477 # Source objects supplied by the framework to the user don't have 1478 # this field populated. 1479 # and tuning the pipeline, etc. 1480 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1481 # (this is a streaming source). 1482 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1483 # read from this source. This estimate is in terms of external storage 1484 # size, before any decompression or other processing done by the reader. 1485 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1486 # the (encoded) keys in lexicographically sorted order. 1487 }, 1488 }, 1489 }, 1490 "primarySource": { # Specification of one of the bundles produced as a result of splitting # DEPRECATED 1491 # a Source (e.g. when executing a SourceSplitRequest, or when 1492 # splitting an active task using WorkItemStatus.dynamic_source_split), 1493 # relative to the source being split. 1494 "derivationMode": "A String", # What source to base the produced source on (if any). 1495 "source": { # A source that records can be read and decoded from. # Specification of the source. 1496 "codec": { # The codec to use to decode data read from the source. 1497 "a_key": "", # Properties of the object. 1498 }, 1499 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1500 # as differences against another source, in order to save backend-side 1501 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1502 # To support this use case, the full set of parameters of the source 1503 # is logically obtained by taking the latest explicitly specified value 1504 # of each parameter in the order: 1505 # base_specs (later items win), spec (overrides anything in base_specs). 1506 { 1507 "a_key": "", # Properties of the object. 1508 }, 1509 ], 1510 "spec": { # The source to read from, plus its parameters. 1511 "a_key": "", # Properties of the object. 1512 }, 1513 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1514 # doesn't need splitting, and using SourceSplitRequest on it would 1515 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1516 # 1517 # E.g. a file splitter may set this to true when splitting a single file 1518 # into a set of byte ranges of appropriate size, and set this 1519 # to false when splitting a filepattern into individual files. 1520 # However, for efficiency, a file splitter may decide to produce 1521 # file subranges directly from the filepattern to avoid a splitting 1522 # round-trip. 1523 # 1524 # See SourceSplitRequest for an overview of the splitting process. 1525 # 1526 # This field is meaningful only in the Source objects populated 1527 # by the user (e.g. when filling in a DerivedSource). 1528 # Source objects supplied by the framework to the user don't have 1529 # this field populated. 1530 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1531 # avoiding a SourceGetMetadataOperation roundtrip 1532 # (see SourceOperationRequest). 1533 # 1534 # This field is meaningful only in the Source objects populated 1535 # by the user (e.g. when filling in a DerivedSource). 1536 # Source objects supplied by the framework to the user don't have 1537 # this field populated. 1538 # and tuning the pipeline, etc. 1539 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1540 # (this is a streaming source). 1541 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1542 # read from this source. This estimate is in terms of external storage 1543 # size, before any decompression or other processing done by the reader. 1544 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1545 # the (encoded) keys in lexicographically sorted order. 1546 }, 1547 }, 1548 }, 1549 "residual": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1550 "derivationMode": "A String", # DEPRECATED 1551 "source": { # A source that records can be read and decoded from. # DEPRECATED 1552 "codec": { # The codec to use to decode data read from the source. 1553 "a_key": "", # Properties of the object. 1554 }, 1555 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1556 # as differences against another source, in order to save backend-side 1557 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1558 # To support this use case, the full set of parameters of the source 1559 # is logically obtained by taking the latest explicitly specified value 1560 # of each parameter in the order: 1561 # base_specs (later items win), spec (overrides anything in base_specs). 1562 { 1563 "a_key": "", # Properties of the object. 1564 }, 1565 ], 1566 "spec": { # The source to read from, plus its parameters. 1567 "a_key": "", # Properties of the object. 1568 }, 1569 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1570 # doesn't need splitting, and using SourceSplitRequest on it would 1571 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1572 # 1573 # E.g. a file splitter may set this to true when splitting a single file 1574 # into a set of byte ranges of appropriate size, and set this 1575 # to false when splitting a filepattern into individual files. 1576 # However, for efficiency, a file splitter may decide to produce 1577 # file subranges directly from the filepattern to avoid a splitting 1578 # round-trip. 1579 # 1580 # See SourceSplitRequest for an overview of the splitting process. 1581 # 1582 # This field is meaningful only in the Source objects populated 1583 # by the user (e.g. when filling in a DerivedSource). 1584 # Source objects supplied by the framework to the user don't have 1585 # this field populated. 1586 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1587 # avoiding a SourceGetMetadataOperation roundtrip 1588 # (see SourceOperationRequest). 1589 # 1590 # This field is meaningful only in the Source objects populated 1591 # by the user (e.g. when filling in a DerivedSource). 1592 # Source objects supplied by the framework to the user don't have 1593 # this field populated. 1594 # and tuning the pipeline, etc. 1595 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1596 # (this is a streaming source). 1597 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1598 # read from this source. This estimate is in terms of external storage 1599 # size, before any decompression or other processing done by the reader. 1600 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1601 # the (encoded) keys in lexicographically sorted order. 1602 }, 1603 }, 1604 }, 1605 "primary": { # DEPRECATED in favor of DerivedSource. # DEPRECATED 1606 "derivationMode": "A String", # DEPRECATED 1607 "source": { # A source that records can be read and decoded from. # DEPRECATED 1608 "codec": { # The codec to use to decode data read from the source. 1609 "a_key": "", # Properties of the object. 1610 }, 1611 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1612 # as differences against another source, in order to save backend-side 1613 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1614 # To support this use case, the full set of parameters of the source 1615 # is logically obtained by taking the latest explicitly specified value 1616 # of each parameter in the order: 1617 # base_specs (later items win), spec (overrides anything in base_specs). 1618 { 1619 "a_key": "", # Properties of the object. 1620 }, 1621 ], 1622 "spec": { # The source to read from, plus its parameters. 1623 "a_key": "", # Properties of the object. 1624 }, 1625 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1626 # doesn't need splitting, and using SourceSplitRequest on it would 1627 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1628 # 1629 # E.g. a file splitter may set this to true when splitting a single file 1630 # into a set of byte ranges of appropriate size, and set this 1631 # to false when splitting a filepattern into individual files. 1632 # However, for efficiency, a file splitter may decide to produce 1633 # file subranges directly from the filepattern to avoid a splitting 1634 # round-trip. 1635 # 1636 # See SourceSplitRequest for an overview of the splitting process. 1637 # 1638 # This field is meaningful only in the Source objects populated 1639 # by the user (e.g. when filling in a DerivedSource). 1640 # Source objects supplied by the framework to the user don't have 1641 # this field populated. 1642 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1643 # avoiding a SourceGetMetadataOperation roundtrip 1644 # (see SourceOperationRequest). 1645 # 1646 # This field is meaningful only in the Source objects populated 1647 # by the user (e.g. when filling in a DerivedSource). 1648 # Source objects supplied by the framework to the user don't have 1649 # this field populated. 1650 # and tuning the pipeline, etc. 1651 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1652 # (this is a streaming source). 1653 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1654 # read from this source. This estimate is in terms of external storage 1655 # size, before any decompression or other processing done by the reader. 1656 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1657 # the (encoded) keys in lexicographically sorted order. 1658 }, 1659 }, 1660 }, 1661 }, 1662 "requestedLeaseDuration": "A String", # Amount of time the worker requests for its lease. 1663 "completed": True or False, # True if the WorkItem was completed (successfully or unsuccessfully). 1664 "workItemId": "A String", # Identifies the WorkItem. 1665 "dynamicSourceSplit": { # When a task splits using WorkItemStatus.dynamic_source_split, this # See documentation of stop_position. 1666 # message describes the two parts of the split relative to the 1667 # description of the current task's input. 1668 "residual": { # Specification of one of the bundles produced as a result of splitting # Residual part (returned to the pool of work). 1669 # Specified relative to the previously-current source. 1670 # a Source (e.g. when executing a SourceSplitRequest, or when 1671 # splitting an active task using WorkItemStatus.dynamic_source_split), 1672 # relative to the source being split. 1673 "derivationMode": "A String", # What source to base the produced source on (if any). 1674 "source": { # A source that records can be read and decoded from. # Specification of the source. 1675 "codec": { # The codec to use to decode data read from the source. 1676 "a_key": "", # Properties of the object. 1677 }, 1678 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1679 # as differences against another source, in order to save backend-side 1680 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1681 # To support this use case, the full set of parameters of the source 1682 # is logically obtained by taking the latest explicitly specified value 1683 # of each parameter in the order: 1684 # base_specs (later items win), spec (overrides anything in base_specs). 1685 { 1686 "a_key": "", # Properties of the object. 1687 }, 1688 ], 1689 "spec": { # The source to read from, plus its parameters. 1690 "a_key": "", # Properties of the object. 1691 }, 1692 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1693 # doesn't need splitting, and using SourceSplitRequest on it would 1694 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1695 # 1696 # E.g. a file splitter may set this to true when splitting a single file 1697 # into a set of byte ranges of appropriate size, and set this 1698 # to false when splitting a filepattern into individual files. 1699 # However, for efficiency, a file splitter may decide to produce 1700 # file subranges directly from the filepattern to avoid a splitting 1701 # round-trip. 1702 # 1703 # See SourceSplitRequest for an overview of the splitting process. 1704 # 1705 # This field is meaningful only in the Source objects populated 1706 # by the user (e.g. when filling in a DerivedSource). 1707 # Source objects supplied by the framework to the user don't have 1708 # this field populated. 1709 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1710 # avoiding a SourceGetMetadataOperation roundtrip 1711 # (see SourceOperationRequest). 1712 # 1713 # This field is meaningful only in the Source objects populated 1714 # by the user (e.g. when filling in a DerivedSource). 1715 # Source objects supplied by the framework to the user don't have 1716 # this field populated. 1717 # and tuning the pipeline, etc. 1718 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1719 # (this is a streaming source). 1720 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1721 # read from this source. This estimate is in terms of external storage 1722 # size, before any decompression or other processing done by the reader. 1723 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1724 # the (encoded) keys in lexicographically sorted order. 1725 }, 1726 }, 1727 }, 1728 "primary": { # Specification of one of the bundles produced as a result of splitting # Primary part (continued to be processed by worker). 1729 # Specified relative to the previously-current source. 1730 # Becomes current. 1731 # a Source (e.g. when executing a SourceSplitRequest, or when 1732 # splitting an active task using WorkItemStatus.dynamic_source_split), 1733 # relative to the source being split. 1734 "derivationMode": "A String", # What source to base the produced source on (if any). 1735 "source": { # A source that records can be read and decoded from. # Specification of the source. 1736 "codec": { # The codec to use to decode data read from the source. 1737 "a_key": "", # Properties of the object. 1738 }, 1739 "baseSpecs": [ # While splitting, sources may specify the produced bundles 1740 # as differences against another source, in order to save backend-side 1741 # memory and allow bigger jobs. For details, see SourceSplitRequest. 1742 # To support this use case, the full set of parameters of the source 1743 # is logically obtained by taking the latest explicitly specified value 1744 # of each parameter in the order: 1745 # base_specs (later items win), spec (overrides anything in base_specs). 1746 { 1747 "a_key": "", # Properties of the object. 1748 }, 1749 ], 1750 "spec": { # The source to read from, plus its parameters. 1751 "a_key": "", # Properties of the object. 1752 }, 1753 "doesNotNeedSplitting": True or False, # Setting this value to true hints to the framework that the source 1754 # doesn't need splitting, and using SourceSplitRequest on it would 1755 # yield SOURCE_SPLIT_OUTCOME_USE_CURRENT. 1756 # 1757 # E.g. a file splitter may set this to true when splitting a single file 1758 # into a set of byte ranges of appropriate size, and set this 1759 # to false when splitting a filepattern into individual files. 1760 # However, for efficiency, a file splitter may decide to produce 1761 # file subranges directly from the filepattern to avoid a splitting 1762 # round-trip. 1763 # 1764 # See SourceSplitRequest for an overview of the splitting process. 1765 # 1766 # This field is meaningful only in the Source objects populated 1767 # by the user (e.g. when filling in a DerivedSource). 1768 # Source objects supplied by the framework to the user don't have 1769 # this field populated. 1770 "metadata": { # Metadata about a Source useful for automatically optimizing # Optionally, metadata for this source can be supplied right away, 1771 # avoiding a SourceGetMetadataOperation roundtrip 1772 # (see SourceOperationRequest). 1773 # 1774 # This field is meaningful only in the Source objects populated 1775 # by the user (e.g. when filling in a DerivedSource). 1776 # Source objects supplied by the framework to the user don't have 1777 # this field populated. 1778 # and tuning the pipeline, etc. 1779 "infinite": True or False, # Specifies that the size of this source is known to be infinite 1780 # (this is a streaming source). 1781 "estimatedSizeBytes": "A String", # An estimate of the total size (in bytes) of the data that would be 1782 # read from this source. This estimate is in terms of external storage 1783 # size, before any decompression or other processing done by the reader. 1784 "producesSortedKeys": True or False, # Whether this source is known to produce key/value pairs with 1785 # the (encoded) keys in lexicographically sorted order. 1786 }, 1787 }, 1788 }, 1789 }, 1790 "counterUpdates": [ # Worker output counters for this WorkItem. 1791 { # An update to a Counter sent from a worker. 1792 "floatingPointList": { # A metric value representing a list of floating point numbers. # List of floating point numbers, for Set. 1793 "elements": [ # Elements of the list. 1794 3.14, 1795 ], 1796 }, 1797 "floatingPoint": 3.14, # Floating point value for Sum, Max, Min. 1798 "integerMean": { # A representation of an integer mean metric contribution. # Integer mean aggregation value for Mean. 1799 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1800 # encoded in JSON. 1801 "lowBits": 42, # The low order bits: n & 0xffffffff. 1802 "highBits": 42, # The high order bits, including the sign: n >> 32. 1803 }, 1804 "sum": { # A representation of an int64, n, that is immune to precision loss when # The sum of all values being aggregated. 1805 # encoded in JSON. 1806 "lowBits": 42, # The low order bits: n & 0xffffffff. 1807 "highBits": 42, # The high order bits, including the sign: n >> 32. 1808 }, 1809 }, 1810 "boolean": True or False, # Boolean value for And, Or. 1811 "integerList": { # A metric value representing a list of integers. # List of integers, for Set. 1812 "elements": [ # Elements of the list. 1813 { # A representation of an int64, n, that is immune to precision loss when 1814 # encoded in JSON. 1815 "lowBits": 42, # The low order bits: n & 0xffffffff. 1816 "highBits": 42, # The high order bits, including the sign: n >> 32. 1817 }, 1818 ], 1819 }, 1820 "cumulative": True or False, # True if this counter is reported as the total cumulative aggregate 1821 # value accumulated since the worker started working on this WorkItem. 1822 # By default this is false, indicating that this counter is reported 1823 # as a delta. 1824 "shortId": "A String", # The service-generated short identifier for this counter. 1825 # The short_id -> (name, metadata) mapping is constant for the lifetime of 1826 # a job. 1827 "floatingPointMean": { # A representation of a floating point mean metric contribution. # Floating point mean aggregation value for Mean. 1828 "count": { # A representation of an int64, n, that is immune to precision loss when # The number of values being aggregated. 1829 # encoded in JSON. 1830 "lowBits": 42, # The low order bits: n & 0xffffffff. 1831 "highBits": 42, # The high order bits, including the sign: n >> 32. 1832 }, 1833 "sum": 3.14, # The sum of all values being aggregated. 1834 }, 1835 "internal": "", # Value for internally-defined counters used by the Dataflow service. 1836 "structuredNameAndMetadata": { # A single message which encapsulates structured name and metadata for a given # Counter structured name and metadata. 1837 # counter. 1838 "name": { # Identifies a counter within a per-job namespace. Counters whose structured # Structured name of the counter. 1839 # names are the same get merged into a single value for the job. 1840 "origin": "A String", # One of the standard Origins defined above. 1841 "executionStepName": "A String", # Name of the stage. An execution step contains multiple component steps. 1842 "name": "A String", # Counter name. Not necessarily globally-unique, but unique within the 1843 # context of the other fields. 1844 # Required. 1845 "workerId": "A String", # ID of a particular worker. 1846 "originalStepName": "A String", # System generated name of the original step in the user's graph, before 1847 # optimization. 1848 "originNamespace": "A String", # A string containing a more specific namespace of the counter's origin. 1849 "portion": "A String", # Portion of this counter, either key or value. 1850 "componentStepName": "A String", # Name of the optimized step being executed by the workers. 1851 }, 1852 "metadata": { # CounterMetadata includes all static non-name non-value counter attributes. # Metadata associated with a counter 1853 "standardUnits": "A String", # System defined Units, see above enum. 1854 "kind": "A String", # Counter aggregation kind. 1855 "otherUnits": "A String", # A string referring to the unit type. 1856 "description": "A String", # Human-readable description of the counter semantics. 1857 }, 1858 }, 1859 "nameAndKind": { # Basic metadata about a counter. # Counter name and aggregation type. 1860 "kind": "A String", # Counter aggregation kind. 1861 "name": "A String", # Name of the counter. 1862 }, 1863 "integer": { # A representation of an int64, n, that is immune to precision loss when # Integer value for Sum, Max, Min. 1864 # encoded in JSON. 1865 "lowBits": 42, # The low order bits: n & 0xffffffff. 1866 "highBits": 42, # The high order bits, including the sign: n >> 32. 1867 }, 1868 "distribution": { # A metric value representing a distribution. # Distribution data 1869 "count": { # A representation of an int64, n, that is immune to precision loss when # The count of the number of elements present in the distribution. 1870 # encoded in JSON. 1871 "lowBits": 42, # The low order bits: n & 0xffffffff. 1872 "highBits": 42, # The high order bits, including the sign: n >> 32. 1873 }, 1874 "min": { # A representation of an int64, n, that is immune to precision loss when # The minimum value present in the distribution. 1875 # encoded in JSON. 1876 "lowBits": 42, # The low order bits: n & 0xffffffff. 1877 "highBits": 42, # The high order bits, including the sign: n >> 32. 1878 }, 1879 "max": { # A representation of an int64, n, that is immune to precision loss when # The maximum value present in the distribution. 1880 # encoded in JSON. 1881 "lowBits": 42, # The low order bits: n & 0xffffffff. 1882 "highBits": 42, # The high order bits, including the sign: n >> 32. 1883 }, 1884 "sum": { # A representation of an int64, n, that is immune to precision loss when # Use an int64 since we'd prefer the added precision. If overflow is a common 1885 # problem we can detect it and use an additional int64 or a double. 1886 # encoded in JSON. 1887 "lowBits": 42, # The low order bits: n & 0xffffffff. 1888 "highBits": 42, # The high order bits, including the sign: n >> 32. 1889 }, 1890 "sumOfSquares": 3.14, # Use a double since the sum of squares is likely to overflow int64. 1891 "logBuckets": [ # (Optional) Logarithmic histogram of values. 1892 # Each log may be in no more than one bucket. Order does not matter. 1893 { # Bucket of values for Distribution's logarithmic histogram. 1894 "count": "A String", # Number of values in this bucket. 1895 "log": 42, # floor(log2(value)); defined to be zero for nonpositive values. 1896 # log(-1) = 0 1897 # log(0) = 0 1898 # log(1) = 0 1899 # log(2) = 1 1900 # log(3) = 1 1901 # log(4) = 2 1902 # log(5) = 2 1903 }, 1904 ], 1905 }, 1906 "stringList": { # A metric value representing a list of strings. # List of strings, for Set. 1907 "elements": [ # Elements of the list. 1908 "A String", 1909 ], 1910 }, 1911 }, 1912 ], 1913 "progress": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of reported_progress. 1914 "position": { # Position defines a position within a collection of data. The value # Obsolete. 1915 # can be either the end position, a key (used with ordered 1916 # collections), a byte offset, or a record index. 1917 "end": True or False, # Position is past all other positions. Also useful for the end 1918 # position of an unbounded range. 1919 "recordIndex": "A String", # Position is a record index. 1920 "byteOffset": "A String", # Position is a byte offset. 1921 "key": "A String", # Position is a string key, ordered lexicographically. 1922 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1923 # position. A ConcatPosition can be used by a reader of a source that 1924 # encapsulates a set of other sources. 1925 "position": # Object with schema name: Position # Position within the inner source. 1926 "index": 42, # Index of the inner source. 1927 }, 1928 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 1929 # sharding). 1930 }, 1931 "remainingTime": "A String", # Obsolete. 1932 "percentComplete": 3.14, # Obsolete. 1933 }, 1934 "metricUpdates": [ # DEPRECATED in favor of counter_updates. 1935 { # Describes the state of a metric. 1936 "meanCount": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 1937 # This holds the count of the aggregated values and is used in combination 1938 # with mean_sum above to obtain the actual mean aggregate value. 1939 # The only possible value type is Long. 1940 "updateTime": "A String", # Timestamp associated with the metric value. Optional when workers are 1941 # reporting work progress; it will be filled in responses from the 1942 # metrics API. 1943 "set": "", # Worker-computed aggregate value for the "Set" aggregation kind. The only 1944 # possible value type is a list of Values whose type can be Long, Double, 1945 # or String, according to the metric's type. All Values in the list must 1946 # be of the same type. 1947 "name": { # Identifies a metric, by describing the source which generated the # Name of the metric. 1948 # metric. 1949 "origin": "A String", # Origin (namespace) of metric name. May be blank for user-define metrics; 1950 # will be "dataflow" for metrics defined by the Dataflow service or SDK. 1951 "name": "A String", # Worker-defined metric name. 1952 "context": { # Zero or more labeled fields which identify the part of the job this 1953 # metric is associated with, such as the name of a step or collection. 1954 # 1955 # For example, built-in counters associated with steps will have 1956 # context['step'] = <step-name>. Counters associated with PCollections 1957 # in the SDK will have context['pcollection'] = <pcollection-name>. 1958 "a_key": "A String", 1959 }, 1960 }, 1961 "cumulative": True or False, # True if this metric is reported as the total cumulative aggregate 1962 # value accumulated since the worker started working on this WorkItem. 1963 # By default this is false, indicating that this metric is reported 1964 # as a delta that is not associated with any WorkItem. 1965 "kind": "A String", # Metric aggregation kind. The possible metric aggregation kinds are 1966 # "Sum", "Max", "Min", "Mean", "Set", "And", "Or", and "Distribution". 1967 # The specified aggregation kind is case-insensitive. 1968 # 1969 # If omitted, this is not an aggregated value but instead 1970 # a single metric sample value. 1971 "scalar": "", # Worker-computed aggregate value for aggregation kinds "Sum", "Max", "Min", 1972 # "And", and "Or". The possible value types are Long, Double, and Boolean. 1973 "meanSum": "", # Worker-computed aggregate value for the "Mean" aggregation kind. 1974 # This holds the sum of the aggregated values and is used in combination 1975 # with mean_count below to obtain the actual mean aggregate value. 1976 # The only possible value types are Long and Double. 1977 "distribution": "", # A struct value describing properties of a distribution of numeric values. 1978 "internal": "", # Worker-computed aggregate value for internal use by the Dataflow 1979 # service. 1980 }, 1981 ], 1982 "reportedProgress": { # A progress measurement of a WorkItem by a worker. # The worker's progress through this WorkItem. 1983 "fractionConsumed": 3.14, # Completion as fraction of the input consumed, from 0.0 (beginning, nothing 1984 # consumed), to 1.0 (end of the input, entire input consumed). 1985 "position": { # Position defines a position within a collection of data. The value # A Position within the work to represent a progress. 1986 # can be either the end position, a key (used with ordered 1987 # collections), a byte offset, or a record index. 1988 "end": True or False, # Position is past all other positions. Also useful for the end 1989 # position of an unbounded range. 1990 "recordIndex": "A String", # Position is a record index. 1991 "byteOffset": "A String", # Position is a byte offset. 1992 "key": "A String", # Position is a string key, ordered lexicographically. 1993 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 1994 # position. A ConcatPosition can be used by a reader of a source that 1995 # encapsulates a set of other sources. 1996 "position": # Object with schema name: Position # Position within the inner source. 1997 "index": 42, # Index of the inner source. 1998 }, 1999 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2000 # sharding). 2001 }, 2002 "remainingParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the input of this task that remains, 2003 # (i.e. can be delegated to this task and any new tasks via dynamic 2004 # splitting). Always at least 1 for non-finished work items and 0 for 2005 # finished. 2006 # 2007 # "Amount of parallelism" refers to how many non-empty parts of the input 2008 # can be read in parallel. This does not necessarily equal number 2009 # of records. An input that can be read in parallel down to the 2010 # individual records is called "perfectly splittable". 2011 # An example of non-perfectly parallelizable input is a block-compressed 2012 # file format where a block of records has to be read as a whole, 2013 # but different blocks can be read in parallel. 2014 # 2015 # Examples: 2016 # * If we are processing record #30 (starting at 1) out of 50 in a perfectly 2017 # splittable 50-record input, this value should be 21 (20 remaining + 1 2018 # current). 2019 # * If we are reading through block 3 in a block-compressed file consisting 2020 # of 5 blocks, this value should be 3 (since blocks 4 and 5 can be 2021 # processed in parallel by new tasks via dynamic splitting and the current 2022 # task remains processing block 3). 2023 # * If we are reading through the last block in a block-compressed file, 2024 # or reading or processing the last record in a perfectly splittable 2025 # input, this value should be 1, because apart from the current task, no 2026 # additional remainder can be split off. 2027 # reported by the worker. 2028 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2029 # ignored. 2030 # Infinite parallelism means the service will assume that the work item 2031 # can always be split into more non-empty work items by dynamic splitting. 2032 # This is a work-around for lack of support for infinity by the current 2033 # JSON-based Java RPC stack. 2034 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2035 }, 2036 "consumedParallelism": { # Represents the level of parallelism in a WorkItem's input, # Total amount of parallelism in the portion of input of this task that has 2037 # already been consumed and is no longer active. In the first two examples 2038 # above (see remaining_parallelism), the value should be 29 or 2 2039 # respectively. The sum of remaining_parallelism and consumed_parallelism 2040 # should equal the total amount of parallelism in this work item. If 2041 # specified, must be finite. 2042 # reported by the worker. 2043 "isInfinite": True or False, # Specifies whether the parallelism is infinite. If true, "value" is 2044 # ignored. 2045 # Infinite parallelism means the service will assume that the work item 2046 # can always be split into more non-empty work items by dynamic splitting. 2047 # This is a work-around for lack of support for infinity by the current 2048 # JSON-based Java RPC stack. 2049 "value": 3.14, # Specifies the level of parallelism in case it is finite. 2050 }, 2051 }, 2052 }, 2053 ], 2054 "location": "A String", # The location which contains the WorkItem's job. 2055 } 2056 2057 x__xgafv: string, V1 error format. 2058 Allowed values 2059 1 - v1 error format 2060 2 - v2 error format 2061 2062Returns: 2063 An object of the form: 2064 2065 { # Response from a request to report the status of WorkItems. 2066 "workItemServiceStates": [ # A set of messages indicating the service-side state for each 2067 # WorkItem whose status was reported, in the same order as the 2068 # WorkItemStatus messages in the ReportWorkItemStatusRequest which 2069 # resulting in this response. 2070 { # The Dataflow service's idea of the current state of a WorkItem 2071 # being processed by a worker. 2072 "reportStatusInterval": "A String", # New recommended reporting interval. 2073 "suggestedStopPosition": { # Position defines a position within a collection of data. The value # Obsolete, always empty. 2074 # can be either the end position, a key (used with ordered 2075 # collections), a byte offset, or a record index. 2076 "end": True or False, # Position is past all other positions. Also useful for the end 2077 # position of an unbounded range. 2078 "recordIndex": "A String", # Position is a record index. 2079 "byteOffset": "A String", # Position is a byte offset. 2080 "key": "A String", # Position is a string key, ordered lexicographically. 2081 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2082 # position. A ConcatPosition can be used by a reader of a source that 2083 # encapsulates a set of other sources. 2084 "position": # Object with schema name: Position # Position within the inner source. 2085 "index": 42, # Index of the inner source. 2086 }, 2087 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2088 # sharding). 2089 }, 2090 "harnessData": { # Other data returned by the service, specific to the particular 2091 # worker harness. 2092 "a_key": "", # Properties of the object. 2093 }, 2094 "nextReportIndex": "A String", # The index value to use for the next report sent by the worker. 2095 # Note: If the report call fails for whatever reason, the worker should 2096 # reuse this index for subsequent report attempts. 2097 "leaseExpireTime": "A String", # Time at which the current lease will expire. 2098 "metricShortId": [ # The short ids that workers should use in subsequent metric updates. 2099 # Workers should strive to use short ids whenever possible, but it is ok 2100 # to request the short_id again if a worker lost track of it 2101 # (e.g. if the worker is recovering from a crash). 2102 # NOTE: it is possible that the response may have short ids for a subset 2103 # of the metrics. 2104 { # The metric short id is returned to the user alongside an offset into 2105 # ReportWorkItemStatusRequest 2106 "shortId": "A String", # The service-generated short identifier for the metric. 2107 "metricIndex": 42, # The index of the corresponding metric in 2108 # the ReportWorkItemStatusRequest. Required. 2109 }, 2110 ], 2111 "splitRequest": { # A suggestion by the service to the worker to dynamically split the WorkItem. # The progress point in the WorkItem where the Dataflow service 2112 # suggests that the worker truncate the task. 2113 "fractionConsumed": 3.14, # A fraction at which to split the work item, from 0.0 (beginning of the 2114 # input) to 1.0 (end of the input). 2115 "position": { # Position defines a position within a collection of data. The value # A Position at which to split the work item. 2116 # can be either the end position, a key (used with ordered 2117 # collections), a byte offset, or a record index. 2118 "end": True or False, # Position is past all other positions. Also useful for the end 2119 # position of an unbounded range. 2120 "recordIndex": "A String", # Position is a record index. 2121 "byteOffset": "A String", # Position is a byte offset. 2122 "key": "A String", # Position is a string key, ordered lexicographically. 2123 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2124 # position. A ConcatPosition can be used by a reader of a source that 2125 # encapsulates a set of other sources. 2126 "position": # Object with schema name: Position # Position within the inner source. 2127 "index": 42, # Index of the inner source. 2128 }, 2129 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2130 # sharding). 2131 }, 2132 }, 2133 "suggestedStopPoint": { # Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest. # DEPRECATED in favor of split_request. 2134 "position": { # Position defines a position within a collection of data. The value # Obsolete. 2135 # can be either the end position, a key (used with ordered 2136 # collections), a byte offset, or a record index. 2137 "end": True or False, # Position is past all other positions. Also useful for the end 2138 # position of an unbounded range. 2139 "recordIndex": "A String", # Position is a record index. 2140 "byteOffset": "A String", # Position is a byte offset. 2141 "key": "A String", # Position is a string key, ordered lexicographically. 2142 "concatPosition": { # A position that encapsulates an inner position and an index for the inner # CloudPosition is a concat position. 2143 # position. A ConcatPosition can be used by a reader of a source that 2144 # encapsulates a set of other sources. 2145 "position": # Object with schema name: Position # Position within the inner source. 2146 "index": 42, # Index of the inner source. 2147 }, 2148 "shufflePosition": "A String", # CloudPosition is a base64 encoded BatchShufflePosition (with FIXED 2149 # sharding). 2150 }, 2151 "remainingTime": "A String", # Obsolete. 2152 "percentComplete": 3.14, # Obsolete. 2153 }, 2154 }, 2155 ], 2156 }</pre> 2157</div> 2158 2159</body></html>