1#!/usr/bin/env python 2# 3# Copyright 2016 - The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""A client that manages Google Compute Engine. 18 19** ComputeClient ** 20 21ComputeClient is a wrapper around Google Compute Engine APIs. 22It provides a set of methods for managing a google compute engine project, 23such as creating images, creating instances, etc. 24 25Design philosophy: We tried to make ComputeClient as stateless as possible, 26and it only keeps states about authentication. ComputeClient should be very 27generic, and only knows how to talk to Compute Engine APIs. 28""" 29import copy 30import functools 31import logging 32import os 33 34from acloud.internal.lib import base_cloud_client 35from acloud.internal.lib import utils 36from acloud.public import errors 37 38logger = logging.getLogger(__name__) 39 40 41class OperationScope(object): 42 """Represents operation scope enum.""" 43 ZONE = "zone" 44 REGION = "region" 45 GLOBAL = "global" 46 47 48class ComputeClient(base_cloud_client.BaseCloudApiClient): 49 """Client that manages GCE.""" 50 51 # API settings, used by BaseCloudApiClient. 52 API_NAME = "compute" 53 API_VERSION = "v1" 54 SCOPE = " ".join(["https://www.googleapis.com/auth/compute", 55 "https://www.googleapis.com/auth/devstorage.read_write"]) 56 # Default settings for gce operations 57 DEFAULT_INSTANCE_SCOPE = [ 58 "https://www.googleapis.com/auth/devstorage.read_only", 59 "https://www.googleapis.com/auth/logging.write" 60 ] 61 OPERATION_TIMEOUT_SECS = 15 * 60 # 15 mins 62 OPERATION_POLL_INTERVAL_SECS = 5 63 MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"] 64 ACCESS_DENIED_CODE = 403 65 66 def __init__(self, acloud_config, oauth2_credentials): 67 """Initialize. 68 69 Args: 70 acloud_config: An AcloudConfig object. 71 oauth2_credentials: An oauth2client.OAuth2Credentials instance. 72 """ 73 super(ComputeClient, self).__init__(oauth2_credentials) 74 self._project = acloud_config.project 75 76 def _GetOperationStatus(self, operation, operation_scope, scope_name=None): 77 """Get status of an operation. 78 79 Args: 80 operation: An Operation resource in the format of json. 81 operation_scope: A value from OperationScope, "zone", "region", 82 or "global". 83 scope_name: If operation_scope is "zone" or "region", this should be 84 the name of the zone or region, e.g. "us-central1-f". 85 86 Returns: 87 Status of the operation, one of "DONE", "PENDING", "RUNNING". 88 89 Raises: 90 errors.DriverError: if the operation fails. 91 """ 92 operation_name = operation["name"] 93 if operation_scope == OperationScope.GLOBAL: 94 api = self.service.globalOperations().get(project=self._project, 95 operation=operation_name) 96 result = self.Execute(api) 97 elif operation_scope == OperationScope.ZONE: 98 api = self.service.zoneOperations().get(project=self._project, 99 operation=operation_name, 100 zone=scope_name) 101 result = self.Execute(api) 102 elif operation_scope == OperationScope.REGION: 103 api = self.service.regionOperations().get(project=self._project, 104 operation=operation_name, 105 region=scope_name) 106 result = self.Execute(api) 107 108 if result.get("error"): 109 errors_list = result["error"]["errors"] 110 raise errors.DriverError("Get operation state failed, errors: %s" % 111 str(errors_list)) 112 return result["status"] 113 114 def WaitOnOperation(self, operation, operation_scope, scope_name=None): 115 """Wait for an operation to finish. 116 117 Args: 118 operation: An Operation resource in the format of json. 119 operation_scope: A value from OperationScope, "zone", "region", 120 or "global". 121 scope_name: If operation_scope is "zone" or "region", this should be 122 the name of the zone or region, e.g. "us-central1-f". 123 """ 124 timeout_exception = errors.GceOperationTimeoutError( 125 "Operation hits timeout, did not complete within %d secs." % 126 self.OPERATION_TIMEOUT_SECS) 127 utils.PollAndWait( 128 func=self._GetOperationStatus, 129 expected_return="DONE", 130 timeout_exception=timeout_exception, 131 timeout_secs=self.OPERATION_TIMEOUT_SECS, 132 sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS, 133 operation=operation, 134 operation_scope=operation_scope, 135 scope_name=scope_name) 136 137 def GetProject(self): 138 """Get project information. 139 140 Returns: 141 A project resource in json. 142 """ 143 api = self.service.projects().get(project=self._project) 144 return self.Execute(api) 145 146 def GetDisk(self, disk_name, zone): 147 """Get disk information. 148 149 Args: 150 disk_name: A string. 151 zone: String, name of zone. 152 153 Returns: 154 An disk resource in json. 155 https://cloud.google.com/compute/docs/reference/latest/disks#resource 156 """ 157 api = self.service.disks().get(project=self._project, 158 zone=zone, 159 disk=disk_name) 160 return self.Execute(api) 161 162 def CheckDiskExists(self, disk_name, zone): 163 """Check if disk exists. 164 165 Args: 166 disk_name: A string 167 zone: String, name of zone. 168 169 Returns: 170 True if disk exists, otherwise False. 171 """ 172 try: 173 self.GetDisk(disk_name, zone) 174 exists = True 175 except errors.ResourceNotFoundError: 176 exists = False 177 logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name, 178 exists) 179 return exists 180 181 def CreateDisk(self, disk_name, source_image, size_gb, zone): 182 """Create a gce disk. 183 184 Args: 185 disk_name: A string 186 source_image: A stirng, name of the image. 187 size_gb: Integer, size in gb. 188 zone: Name of the zone, e.g. us-central1-b. 189 """ 190 logger.info("Creating disk %s, size_gb: %d", disk_name, size_gb) 191 source_image = "projects/%s/global/images/%s" % ( 192 self._project, source_image) if source_image else None 193 body = { 194 "name": disk_name, 195 "sizeGb": size_gb, 196 "type": "projects/%s/zones/%s/diskTypes/pd-standard" % ( 197 self._project, zone), 198 } 199 api = self.service.disks().insert(project=self._project, 200 sourceImage=source_image, 201 zone=zone, 202 body=body) 203 operation = self.Execute(api) 204 try: 205 self.WaitOnOperation(operation=operation, 206 operation_scope=OperationScope.ZONE, 207 scope_name=zone) 208 except errors.DriverError: 209 logger.error("Creating disk failed, cleaning up: %s", disk_name) 210 if self.CheckDiskExists(disk_name, zone): 211 self.DeleteDisk(disk_name, zone) 212 raise 213 logger.info("Disk %s has been created.", disk_name) 214 215 def DeleteDisk(self, disk_name, zone): 216 """Delete a gce disk. 217 218 Args: 219 disk_name: A string, name of disk. 220 zone: A string, name of zone. 221 """ 222 logger.info("Deleting disk %s", disk_name) 223 api = self.service.disks().delete(project=self._project, 224 zone=zone, 225 disk=disk_name) 226 operation = self.Execute(api) 227 self.WaitOnOperation(operation=operation, 228 operation_scope=OperationScope.GLOBAL) 229 logger.info("Deleted disk %s", disk_name) 230 231 def DeleteDisks(self, disk_names, zone): 232 """Delete multiple disks. 233 234 Args: 235 disk_names: A list of disk names. 236 zone: A string, name of zone. 237 238 Returns: 239 A tuple, (deleted, failed, error_msgs) 240 deleted: A list of names of disks that have been deleted. 241 failed: A list of names of disks that we fail to delete. 242 error_msgs: A list of failure messages. 243 """ 244 if not disk_names: 245 logger.warn("Nothing to delete. Arg disk_names is not provided.") 246 return [], [], [] 247 # Batch send deletion requests. 248 logger.info("Deleting disks: %s", disk_names) 249 delete_requests = {} 250 for disk_name in set(disk_names): 251 request = self.service.disks().delete(project=self._project, 252 disk=disk_name, 253 zone=zone) 254 delete_requests[disk_name] = request 255 return self._BatchExecuteAndWait(delete_requests, 256 OperationScope.GLOBAL) 257 258 def ListDisks(self, zone, disk_filter=None): 259 """List disks. 260 261 Args: 262 zone: A string, representing zone name. e.g. "us-central1-f" 263 disk_filter: A string representing a filter in format of 264 FIELD_NAME COMPARISON_STRING LITERAL_STRING 265 e.g. "name ne example-instance" 266 e.g. "name eq "example-instance-[0-9]+"" 267 268 Returns: 269 A list of disks. 270 """ 271 return self.ListWithMultiPages(api_resource=self.service.disks().list, 272 project=self._project, 273 zone=zone, 274 filter=disk_filter) 275 276 def CreateImage(self, image_name, source_uri): 277 """Create a Gce image. 278 279 Args: 280 image_name: A string 281 source_uri: Full Google Cloud Storage URL where the disk image is 282 stored. e.g. "https://storage.googleapis.com/my-bucket/ 283 avd-system-2243663.tar.gz" 284 """ 285 logger.info("Creating image %s, source_uri %s", image_name, source_uri) 286 body = {"name": image_name, "rawDisk": {"source": source_uri, }, } 287 api = self.service.images().insert(project=self._project, body=body) 288 operation = self.Execute(api) 289 try: 290 self.WaitOnOperation(operation=operation, 291 operation_scope=OperationScope.GLOBAL) 292 except errors.DriverError: 293 logger.error("Creating image failed, cleaning up: %s", image_name) 294 if self.CheckImageExists(image_name): 295 self.DeleteImage(image_name) 296 raise 297 logger.info("Image %s has been created.", image_name) 298 299 def CheckImageExists(self, image_name): 300 """Check if image exists. 301 302 Args: 303 image_name: A string 304 305 Returns: 306 True if image exists, otherwise False. 307 """ 308 try: 309 self.GetImage(image_name) 310 exists = True 311 except errors.ResourceNotFoundError: 312 exists = False 313 logger.debug("CheckImageExists: image_name: %s, result: %s", 314 image_name, exists) 315 return exists 316 317 def GetImage(self, image_name): 318 """Get image information. 319 320 Args: 321 image_name: A string 322 323 Returns: 324 An image resource in json. 325 https://cloud.google.com/compute/docs/reference/latest/images#resource 326 """ 327 api = self.service.images().get(project=self._project, 328 image=image_name) 329 return self.Execute(api) 330 331 def DeleteImage(self, image_name): 332 """Delete an image. 333 334 Args: 335 image_name: A string 336 """ 337 logger.info("Deleting image %s", image_name) 338 api = self.service.images().delete(project=self._project, 339 image=image_name) 340 operation = self.Execute(api) 341 self.WaitOnOperation(operation=operation, 342 operation_scope=OperationScope.GLOBAL) 343 logger.info("Deleted image %s", image_name) 344 345 def DeleteImages(self, image_names): 346 """Delete multiple images. 347 348 Args: 349 image_names: A list of image names. 350 351 Returns: 352 A tuple, (deleted, failed, error_msgs) 353 deleted: A list of names of images that have been deleted. 354 failed: A list of names of images that we fail to delete. 355 error_msgs: A list of failure messages. 356 """ 357 if not image_names: 358 return [], [], [] 359 # Batch send deletion requests. 360 logger.info("Deleting images: %s", image_names) 361 delete_requests = {} 362 for image_name in set(image_names): 363 request = self.service.images().delete(project=self._project, 364 image=image_name) 365 delete_requests[image_name] = request 366 return self._BatchExecuteAndWait(delete_requests, 367 OperationScope.GLOBAL) 368 369 def ListImages(self, image_filter=None): 370 """List images. 371 372 Args: 373 image_filter: A string representing a filter in format of 374 FIELD_NAME COMPARISON_STRING LITERAL_STRING 375 e.g. "name ne example-image" 376 e.g. "name eq "example-image-[0-9]+"" 377 378 Returns: 379 A list of images. 380 """ 381 return self.ListWithMultiPages(api_resource=self.service.images().list, 382 project=self._project, 383 filter=image_filter) 384 385 def GetInstance(self, instance, zone): 386 """Get information about an instance. 387 388 Args: 389 instance: A string, representing instance name. 390 zone: A string, representing zone name. e.g. "us-central1-f" 391 392 Returns: 393 An instance resource in json. 394 https://cloud.google.com/compute/docs/reference/latest/instances#resource 395 """ 396 api = self.service.instances().get(project=self._project, 397 zone=zone, 398 instance=instance) 399 return self.Execute(api) 400 401 def StartInstance(self, instance, zone): 402 """Start |instance| in |zone|. 403 404 Args: 405 instance: A string, representing instance name. 406 zone: A string, representing zone name. e.g. "us-central1-f" 407 408 Raises: 409 errors.GceOperationTimeoutError: Operation takes too long to finish. 410 """ 411 api = self.service.instances().start(project=self._project, 412 zone=zone, 413 instance=instance) 414 operation = self.Execute(api) 415 try: 416 self.WaitOnOperation(operation=operation, 417 operation_scope=OperationScope.ZONE, 418 scope_name=zone) 419 except errors.GceOperationTimeoutError: 420 logger.error("Start instance failed: %s", instance) 421 raise 422 logger.info("Instance %s has been started.", instance) 423 424 def StartInstances(self, instances, zone): 425 """Start |instances| in |zone|. 426 427 Args: 428 instances: A list of strings, representing instance names's list. 429 zone: A string, representing zone name. e.g. "us-central1-f" 430 431 Returns: 432 A tuple, (done, failed, error_msgs) 433 done: A list of string, representing the names of instances that 434 have been executed. 435 failed: A list of string, representing the names of instances that 436 we failed to execute. 437 error_msgs: A list of string, representing the failure messages. 438 """ 439 action = functools.partial(self.service.instances().start, 440 project=self._project, 441 zone=zone) 442 return self._BatchExecuteOnInstances(instances, zone, action) 443 444 def StopInstance(self, instance, zone): 445 """Stop |instance| in |zone|. 446 447 Args: 448 instance: A string, representing instance name. 449 zone: A string, representing zone name. e.g. "us-central1-f" 450 451 Raises: 452 errors.GceOperationTimeoutError: Operation takes too long to finish. 453 """ 454 api = self.service.instances().stop(project=self._project, 455 zone=zone, 456 instance=instance) 457 operation = self.Execute(api) 458 try: 459 self.WaitOnOperation(operation=operation, 460 operation_scope=OperationScope.ZONE, 461 scope_name=zone) 462 except errors.GceOperationTimeoutError: 463 logger.error("Stop instance failed: %s", instance) 464 raise 465 logger.info("Instance %s has been terminated.", instance) 466 467 def StopInstances(self, instances, zone): 468 """Stop |instances| in |zone|. 469 470 Args: 471 instances: A list of strings, representing instance names's list. 472 zone: A string, representing zone name. e.g. "us-central1-f" 473 474 Returns: 475 A tuple, (done, failed, error_msgs) 476 done: A list of string, representing the names of instances that 477 have been executed. 478 failed: A list of string, representing the names of instances that 479 we failed to execute. 480 error_msgs: A list of string, representing the failure messages. 481 """ 482 action = functools.partial(self.service.instances().stop, 483 project=self._project, 484 zone=zone) 485 return self._BatchExecuteOnInstances(instances, zone, action) 486 487 def SetScheduling(self, 488 instance, 489 zone, 490 automatic_restart=True, 491 on_host_maintenance="MIGRATE"): 492 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 493 494 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option. 495 496 Args: 497 instance: A string, representing instance name. 498 zone: A string, representing zone name. e.g. "us-central1-f". 499 automatic_restart: Boolean, determine whether the instance will 500 automatically restart if it crashes or not, 501 default to True. 502 on_host_maintenance: enum["MIGRATE", "TERMINATED] 503 The instance's maintenance behavior, which 504 determines whether the instance is live 505 "MIGRATE" or "TERMINATED" when there is 506 a maintenance event. 507 508 Raises: 509 errors.GceOperationTimeoutError: Operation takes too long to finish. 510 """ 511 body = {"automaticRestart": automatic_restart, 512 "OnHostMaintenance": on_host_maintenance} 513 api = self.service.instances().setScheduling(project=self._project, 514 zone=zone, 515 instance=instance, 516 body=body) 517 operation = self.Execute(api) 518 try: 519 self.WaitOnOperation(operation=operation, 520 operation_scope=OperationScope.ZONE, 521 scope_name=zone) 522 except errors.GceOperationTimeoutError: 523 logger.error("Set instance scheduling failed: %s", instance) 524 raise 525 logger.info("Instance scheduling changed:\n" 526 " automaticRestart: %s\n" 527 " onHostMaintenance: %s\n", 528 str(automatic_restart).lower(), on_host_maintenance) 529 530 def ListInstances(self, zone, instance_filter=None): 531 """List instances. 532 533 Args: 534 zone: A string, representing zone name. e.g. "us-central1-f" 535 instance_filter: A string representing a filter in format of 536 FIELD_NAME COMPARISON_STRING LITERAL_STRING 537 e.g. "name ne example-instance" 538 e.g. "name eq "example-instance-[0-9]+"" 539 540 Returns: 541 A list of instances. 542 """ 543 return self.ListWithMultiPages( 544 api_resource=self.service.instances().list, 545 project=self._project, 546 zone=zone, 547 filter=instance_filter) 548 549 def SetSchedulingInstances(self, 550 instances, 551 zone, 552 automatic_restart=True, 553 on_host_maintenance="MIGRATE"): 554 """Update scheduling config |automatic_restart| and |on_host_maintenance|. 555 556 See //cloud/cluster/api/mixer_instances.proto Scheduling for config option. 557 558 Args: 559 instances: A list of string, representing instance names. 560 zone: A string, representing zone name. e.g. "us-central1-f". 561 automatic_restart: Boolean, determine whether the instance will 562 automatically restart if it crashes or not, 563 default to True. 564 on_host_maintenance: enum["MIGRATE", "TERMINATED] 565 The instance's maintenance behavior, which 566 determines whether the instance is live 567 "MIGRATE" or "TERMINATED" when there is 568 a maintenance event. 569 570 Returns: 571 A tuple, (done, failed, error_msgs) 572 done: A list of string, representing the names of instances that 573 have been executed. 574 failed: A list of string, representing the names of instances that 575 we failed to execute. 576 error_msgs: A list of string, representing the failure messages. 577 """ 578 body = {"automaticRestart": automatic_restart, 579 "OnHostMaintenance": on_host_maintenance} 580 action = functools.partial(self.service.instances().setScheduling, 581 project=self._project, 582 zone=zone, 583 body=body) 584 return self._BatchExecuteOnInstances(instances, zone, action) 585 586 def _BatchExecuteOnInstances(self, instances, zone, action): 587 """Batch processing operations requiring computing time. 588 589 Args: 590 instances: A list of instance names. 591 zone: A string, e.g. "us-central1-f". 592 action: partial func, all kwargs for this gcloud action has been 593 defined in the caller function (e.g. See "StartInstances") 594 except 'instance' which will be defined by iterating the 595 |instances|. 596 597 Returns: 598 A tuple, (done, failed, error_msgs) 599 done: A list of string, representing the names of instances that 600 have been executed. 601 failed: A list of string, representing the names of instances that 602 we failed to execute. 603 error_msgs: A list of string, representing the failure messages. 604 """ 605 if not instances: 606 return [], [], [] 607 # Batch send requests. 608 logger.info("Batch executing instances: %s", instances) 609 requests = {} 610 for instance_name in set(instances): 611 requests[instance_name] = action(instance=instance_name) 612 return self._BatchExecuteAndWait(requests, 613 operation_scope=OperationScope.ZONE, 614 scope_name=zone) 615 616 def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None): 617 """Batch processing requests and wait on the operation. 618 619 Args: 620 requests: A dictionary. The key is a string representing the resource 621 name. For example, an instance name, or an image name. 622 operation_scope: A value from OperationScope, "zone", "region", 623 or "global". 624 scope_name: If operation_scope is "zone" or "region", this should be 625 the name of the zone or region, e.g. "us-central1-f". 626 627 Returns: 628 A tuple, (done, failed, error_msgs) 629 done: A list of string, representing the resource names that have 630 been executed. 631 failed: A list of string, representing resource names that 632 we failed to execute. 633 error_msgs: A list of string, representing the failure messages. 634 """ 635 results = self.BatchExecute(requests) 636 # Initialize return values 637 failed = [] 638 error_msgs = [] 639 for resource_name, (_, error) in results.iteritems(): 640 if error is not None: 641 failed.append(resource_name) 642 error_msgs.append(str(error)) 643 done = [] 644 # Wait for the executing operations to finish. 645 logger.info("Waiting for executing operations") 646 for resource_name in requests.iterkeys(): 647 operation, _ = results[resource_name] 648 if operation: 649 try: 650 self.WaitOnOperation(operation, operation_scope, 651 scope_name) 652 done.append(resource_name) 653 except errors.DriverError as exc: 654 failed.append(resource_name) 655 error_msgs.append(str(exc)) 656 return done, failed, error_msgs 657 658 def ListZones(self): 659 """List all zone instances in the project. 660 661 Returns: 662 Gcompute response instance. For example: 663 { 664 "id": "projects/google.com%3Aandroid-build-staging/zones", 665 "kind": "compute#zoneList", 666 "selfLink": "https://www.googleapis.com/compute/v1/projects/" 667 "google.com:android-build-staging/zones" 668 "items": [ 669 { 670 'creationTimestamp': '2014-07-15T10:44:08.663-07:00', 671 'description': 'asia-east1-c', 672 'id': '2222', 673 'kind': 'compute#zone', 674 'name': 'asia-east1-c', 675 'region': 'https://www.googleapis.com/compute/v1/projects/' 676 'google.com:android-build-staging/regions/asia-east1', 677 'selfLink': 'https://www.googleapis.com/compute/v1/projects/' 678 'google.com:android-build-staging/zones/asia-east1-c', 679 'status': 'UP' 680 }, { 681 'creationTimestamp': '2014-05-30T18:35:16.575-07:00', 682 'description': 'asia-east1-b', 683 'id': '2221', 684 'kind': 'compute#zone', 685 'name': 'asia-east1-b', 686 'region': 'https://www.googleapis.com/compute/v1/projects/' 687 'google.com:android-build-staging/regions/asia-east1', 688 'selfLink': 'https://www.googleapis.com/compute/v1/projects' 689 '/google.com:android-build-staging/zones/asia-east1-b', 690 'status': 'UP' 691 }] 692 } 693 See cloud cluster's api/mixer_zones.proto 694 """ 695 api = self.service.zones().list(project=self._project) 696 return self.Execute(api) 697 698 def _GetNetworkArgs(self, network): 699 """Helper to generate network args that is used to create an instance. 700 701 Args: 702 network: A string, e.g. "default". 703 704 Returns: 705 A dictionary representing network args. 706 """ 707 return { 708 "network": self.GetNetworkUrl(network), 709 "accessConfigs": [{"name": "External NAT", 710 "type": "ONE_TO_ONE_NAT"}] 711 } 712 713 def _GetDiskArgs(self, disk_name, image_name): 714 """Helper to generate disk args that is used to create an instance. 715 716 Args: 717 disk_name: A string 718 image_name: A string 719 720 Returns: 721 A dictionary representing disk args. 722 """ 723 return [{ 724 "type": "PERSISTENT", 725 "boot": True, 726 "mode": "READ_WRITE", 727 "autoDelete": True, 728 "initializeParams": { 729 "diskName": disk_name, 730 "sourceImage": self.GetImage(image_name)["selfLink"], 731 }, 732 }] 733 734 def CreateInstance(self, 735 instance, 736 image_name, 737 machine_type, 738 metadata, 739 network, 740 zone, 741 disk_args=None): 742 """Create a gce instance with a gce image. 743 744 Args: 745 instance: instance name. 746 image_name: A source image used to create this disk. 747 machine_type: A string, representing machine_type, e.g. "n1-standard-1" 748 metadata: A dictionary that maps a metadata name to its value. 749 network: A string, representing network name, e.g. "default" 750 zone: A string, representing zone name, e.g. "us-central1-f" 751 disk_args: A list of extra disk args, see _GetDiskArgs for example, 752 if None, will create a disk using the given image. 753 """ 754 disk_args = (disk_args or self._GetDiskArgs(instance, image_name)) 755 body = { 756 "machineType": self.GetMachineType(machine_type, zone)["selfLink"], 757 "name": instance, 758 "networkInterfaces": [self._GetNetworkArgs(network)], 759 "disks": disk_args, 760 "serviceAccounts": [ 761 {"email": "default", 762 "scopes": self.DEFAULT_INSTANCE_SCOPE} 763 ], 764 } 765 766 if metadata: 767 metadata_list = [{"key": key, 768 "value": val} 769 for key, val in metadata.iteritems()] 770 body["metadata"] = {"items": metadata_list} 771 logger.info("Creating instance: project %s, zone %s, body:%s", 772 self._project, zone, body) 773 api = self.service.instances().insert(project=self._project, 774 zone=zone, 775 body=body) 776 operation = self.Execute(api) 777 self.WaitOnOperation(operation, 778 operation_scope=OperationScope.ZONE, 779 scope_name=zone) 780 logger.info("Instance %s has been created.", instance) 781 782 def DeleteInstance(self, instance, zone): 783 """Delete a gce instance. 784 785 Args: 786 instance: A string, instance name. 787 zone: A string, e.g. "us-central1-f" 788 """ 789 logger.info("Deleting instance: %s", instance) 790 api = self.service.instances().delete(project=self._project, 791 zone=zone, 792 instance=instance) 793 operation = self.Execute(api) 794 self.WaitOnOperation(operation, 795 operation_scope=OperationScope.ZONE, 796 scope_name=zone) 797 logger.info("Deleted instance: %s", instance) 798 799 def DeleteInstances(self, instances, zone): 800 """Delete multiple instances. 801 802 Args: 803 instances: A list of instance names. 804 zone: A string, e.g. "us-central1-f". 805 806 Returns: 807 A tuple, (deleted, failed, error_msgs) 808 deleted: A list of names of instances that have been deleted. 809 failed: A list of names of instances that we fail to delete. 810 error_msgs: A list of failure messages. 811 """ 812 action = functools.partial(self.service.instances().delete, 813 project=self._project, 814 zone=zone) 815 return self._BatchExecuteOnInstances(instances, zone, action) 816 817 def ResetInstance(self, instance, zone): 818 """Reset the gce instance. 819 820 Args: 821 instance: A string, instance name. 822 zone: A string, e.g. "us-central1-f". 823 """ 824 logger.info("Resetting instance: %s", instance) 825 api = self.service.instances().reset(project=self._project, 826 zone=zone, 827 instance=instance) 828 operation = self.Execute(api) 829 self.WaitOnOperation(operation, 830 operation_scope=OperationScope.ZONE, 831 scope_name=zone) 832 logger.info("Instance has been reset: %s", instance) 833 834 def GetMachineType(self, machine_type, zone): 835 """Get URL for a given machine typle. 836 837 Args: 838 machine_type: A string, name of the machine type. 839 zone: A string, e.g. "us-central1-f" 840 841 Returns: 842 A machine type resource in json. 843 https://cloud.google.com/compute/docs/reference/latest/ 844 machineTypes#resource 845 """ 846 api = self.service.machineTypes().get(project=self._project, 847 zone=zone, 848 machineType=machine_type) 849 return self.Execute(api) 850 851 def GetNetworkUrl(self, network): 852 """Get URL for a given network. 853 854 Args: 855 network: A string, representing network name, e.g "default" 856 857 Returns: 858 A URL that points to the network resource, e.g. 859 https://www.googleapis.com/compute/v1/projects/<project id>/ 860 global/networks/default 861 """ 862 api = self.service.networks().get(project=self._project, 863 network=network) 864 result = self.Execute(api) 865 return result["selfLink"] 866 867 def CompareMachineSize(self, machine_type_1, machine_type_2, zone): 868 """Compare the size of two machine types. 869 870 Args: 871 machine_type_1: A string representing a machine type, e.g. n1-standard-1 872 machine_type_2: A string representing a machine type, e.g. n1-standard-1 873 zone: A string representing a zone, e.g. "us-central1-f" 874 875 Returns: 876 1 if size of the first type is greater than the second type. 877 2 if size of the first type is smaller than the second type. 878 0 if they are equal. 879 880 Raises: 881 errors.DriverError: For malformed response. 882 """ 883 machine_info_1 = self.GetMachineType(machine_type_1, zone) 884 machine_info_2 = self.GetMachineType(machine_type_2, zone) 885 for metric in self.MACHINE_SIZE_METRICS: 886 if metric not in machine_info_1 or metric not in machine_info_2: 887 raise errors.DriverError( 888 "Malformed machine size record: Can't find '%s' in %s or %s" 889 % (metric, machine_info_1, machine_info_2)) 890 if machine_info_1[metric] - machine_info_2[metric] > 0: 891 return 1 892 elif machine_info_1[metric] - machine_info_2[metric] < 0: 893 return -1 894 return 0 895 896 def GetSerialPortOutput(self, instance, zone, port=1): 897 """Get serial port output. 898 899 Args: 900 instance: string, instance name. 901 zone: string, zone name. 902 port: int, which COM port to read from, 1-4, default to 1. 903 904 Returns: 905 String, contents of the output. 906 907 Raises: 908 errors.DriverError: For malformed response. 909 """ 910 api = self.service.instances().getSerialPortOutput( 911 project=self._project, 912 zone=zone, 913 instance=instance, 914 port=port) 915 result = self.Execute(api) 916 if "contents" not in result: 917 raise errors.DriverError( 918 "Malformed response for GetSerialPortOutput: %s" % result) 919 return result["contents"] 920 921 def GetInstanceNamesByIPs(self, ips, zone): 922 """Get Instance names by IPs. 923 924 This function will go through all instances, which 925 could be slow if there are too many instances. However, currently 926 GCE doesn't support search for instance by IP. 927 928 Args: 929 ips: A set of IPs. 930 zone: String, name of the zone. 931 932 Returns: 933 A dictionary where key is IP and value is instance name or None 934 if instance is not found for the given IP. 935 """ 936 ip_name_map = dict.fromkeys(ips) 937 for instance in self.ListInstances(zone): 938 try: 939 ip = instance["networkInterfaces"][0]["accessConfigs"][0][ 940 "natIP"] 941 if ip in ips: 942 ip_name_map[ip] = instance["name"] 943 except (IndexError, KeyError) as e: 944 logger.error("Could not get instance names by ips: %s", str(e)) 945 return ip_name_map 946 947 def GetInstanceIP(self, instance, zone): 948 """Get Instance IP given instance name. 949 950 Args: 951 instance: String, representing instance name. 952 zone: String, name of the zone. 953 954 Returns: 955 string, IP of the instance. 956 """ 957 # TODO(fdeng): This is for accessing external IP. 958 # We should handle internal IP as well when the script is running 959 # on a GCE instance in the same network of |instance|. 960 instance = self.GetInstance(instance, zone) 961 return instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"] 962 963 def SetCommonInstanceMetadata(self, body): 964 """Set project-wide metadata. 965 966 Args: 967 body: Metadata body. 968 metdata is in the following format. 969 { 970 "kind": "compute#metadata", 971 "fingerprint": "a-23icsyx4E=", 972 "items": [ 973 { 974 "key": "google-compute-default-region", 975 "value": "us-central1" 976 }, ... 977 ] 978 } 979 """ 980 api = self.service.projects().setCommonInstanceMetadata( 981 project=self._project, body=body) 982 operation = self.Execute(api) 983 self.WaitOnOperation(operation, operation_scope=OperationScope.GLOBAL) 984 985 def AddSshRsa(self, user, ssh_rsa_path): 986 """Add the public rsa key to the project's metadata. 987 988 Compute engine instances that are created after will 989 by default contain the key. 990 991 Args: 992 user: the name of the user which the key belongs to. 993 ssh_rsa_path: The absolute path to public rsa key. 994 """ 995 if not os.path.exists(ssh_rsa_path): 996 raise errors.DriverError("RSA file %s does not exist." % 997 ssh_rsa_path) 998 999 logger.info("Adding ssh rsa key from %s to project %s for user: %s", 1000 ssh_rsa_path, self._project, user) 1001 project = self.GetProject() 1002 with open(ssh_rsa_path) as f: 1003 rsa = f.read() 1004 rsa = rsa.strip() if rsa else rsa 1005 utils.VerifyRsaPubKey(rsa) 1006 metadata = project["commonInstanceMetadata"] 1007 for item in metadata.setdefault("items", []): 1008 if item["key"] == "sshKeys": 1009 sshkey_item = item 1010 break 1011 else: 1012 sshkey_item = {"key": "sshKeys", "value": ""} 1013 metadata["items"].append(sshkey_item) 1014 1015 entry = "%s:%s" % (user, rsa) 1016 logger.debug("New RSA entry: %s", entry) 1017 sshkey_item["value"] = "\n".join([sshkey_item["value"].strip(), entry 1018 ]).strip() 1019 self.SetCommonInstanceMetadata(metadata) 1020 1021 def CheckAccess(self): 1022 """Check if the user has read access to the cloud project. 1023 1024 Returns: 1025 True if the user has at least read access to the project. 1026 False otherwise. 1027 1028 Raises: 1029 errors.HttpError if other unexpected error happens when 1030 accessing the project. 1031 """ 1032 api = self.service.zones().list(project=self._project) 1033 retry_http_codes = copy.copy(self.RETRY_HTTP_CODES) 1034 retry_http_codes.remove(self.ACCESS_DENIED_CODE) 1035 try: 1036 self.Execute(api, retry_http_codes=retry_http_codes) 1037 except errors.HttpError as e: 1038 if e.code == self.ACCESS_DENIED_CODE: 1039 return False 1040 raise 1041 return True 1042