1#!/usr/bin/env python
2#
3# Copyright 2016 - The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""A client that manages Google Compute Engine.
17
18** ComputeClient **
19
20ComputeClient is a wrapper around Google Compute Engine APIs.
21It provides a set of methods for managing a google compute engine project,
22such as creating images, creating instances, etc.
23
24Design philosophy: We tried to make ComputeClient as stateless as possible,
25and it only keeps states about authentication. ComputeClient should be very
26generic, and only knows how to talk to Compute Engine APIs.
27"""
28# pylint: disable=too-many-lines
29import copy
30import functools
31import getpass
32import logging
33import os
34import re
35
36import six
37
38from acloud import errors
39from acloud.internal import constants
40from acloud.internal.lib import base_cloud_client
41from acloud.internal.lib import utils
42from acloud.internal.lib.ssh import IP
43
44
45logger = logging.getLogger(__name__)
46
47_MAX_RETRIES_ON_FINGERPRINT_CONFLICT = 10
48_METADATA_KEY = "key"
49_METADATA_KEY_VALUE = "value"
50_SSH_KEYS_NAME = "sshKeys"
51_ITEMS = "items"
52_METADATA = "metadata"
53_ZONE_RE = re.compile(r"^zones/(?P<zone>.+)")
54
55BASE_DISK_ARGS = {
56    "type": "PERSISTENT",
57    "boot": True,
58    "mode": "READ_WRITE",
59    "autoDelete": True,
60    "initializeParams": {},
61}
62
63
64class OperationScope(object):
65    """Represents operation scope enum."""
66    ZONE = "zone"
67    REGION = "region"
68    GLOBAL = "global"
69
70
71class PersistentDiskType(object):
72    """Represents different persistent disk types.
73
74    pd-standard for regular hard disk.
75    pd-ssd for solid state disk.
76    """
77    STANDARD = "pd-standard"
78    SSD = "pd-ssd"
79
80
81class ImageStatus(object):
82    """Represents the status of an image."""
83    PENDING = "PENDING"
84    READY = "READY"
85    FAILED = "FAILED"
86
87
88def _IsFingerPrintError(exc):
89    """Determine if the exception is a HTTP error with code 412.
90
91    Args:
92        exc: Exception instance.
93
94    Returns:
95        Boolean. True if the exception is a "Precondition Failed" error.
96    """
97    return isinstance(exc, errors.HttpError) and exc.code == 412
98
99
100# pylint: disable=too-many-public-methods
101class ComputeClient(base_cloud_client.BaseCloudApiClient):
102    """Client that manages GCE."""
103
104    # API settings, used by BaseCloudApiClient.
105    API_NAME = "compute"
106    API_VERSION = "v1"
107    SCOPE = " ".join([
108        "https://www.googleapis.com/auth/compute",
109        "https://www.googleapis.com/auth/devstorage.read_write"
110    ])
111    # Default settings for gce operations
112    DEFAULT_INSTANCE_SCOPE = [
113        "https://www.googleapis.com/auth/androidbuild.internal",
114        "https://www.googleapis.com/auth/devstorage.read_only",
115        "https://www.googleapis.com/auth/logging.write"
116    ]
117    OPERATION_TIMEOUT_SECS = 30 * 60  # 30 mins
118    OPERATION_POLL_INTERVAL_SECS = 20
119    MACHINE_SIZE_METRICS = ["guestCpus", "memoryMb"]
120    ACCESS_DENIED_CODE = 403
121
122    def __init__(self, acloud_config, oauth2_credentials):
123        """Initialize.
124
125        Args:
126            acloud_config: An AcloudConfig object.
127            oauth2_credentials: An oauth2client.OAuth2Credentials instance.
128        """
129        super(ComputeClient, self).__init__(oauth2_credentials)
130        self._project = acloud_config.project
131
132    def _GetOperationStatus(self, operation, operation_scope, scope_name=None):
133        """Get status of an operation.
134
135        Args:
136            operation: An Operation resource in the format of json.
137            operation_scope: A value from OperationScope, "zone", "region",
138                             or "global".
139            scope_name: If operation_scope is "zone" or "region", this should be
140                        the name of the zone or region, e.g. "us-central1-f".
141
142        Returns:
143            Status of the operation, one of "DONE", "PENDING", "RUNNING".
144
145        Raises:
146            errors.DriverError: if the operation fails.
147        """
148        operation_name = operation["name"]
149        if operation_scope == OperationScope.GLOBAL:
150            api = self.service.globalOperations().get(
151                project=self._project, operation=operation_name)
152            result = self.Execute(api)
153        elif operation_scope == OperationScope.ZONE:
154            api = self.service.zoneOperations().get(
155                project=self._project,
156                operation=operation_name,
157                zone=scope_name)
158            result = self.Execute(api)
159        elif operation_scope == OperationScope.REGION:
160            api = self.service.regionOperations().get(
161                project=self._project,
162                operation=operation_name,
163                region=scope_name)
164            result = self.Execute(api)
165
166        if result.get("error"):
167            errors_list = result["error"]["errors"]
168            raise errors.DriverError(
169                "Get operation state failed, errors: %s" % str(errors_list))
170        return result["status"]
171
172    def WaitOnOperation(self, operation, operation_scope, scope_name=None):
173        """Wait for an operation to finish.
174
175        Args:
176            operation: An Operation resource in the format of json.
177            operation_scope: A value from OperationScope, "zone", "region",
178                             or "global".
179            scope_name: If operation_scope is "zone" or "region", this should be
180                        the name of the zone or region, e.g. "us-central1-f".
181        """
182        timeout_exception = errors.GceOperationTimeoutError(
183            "Operation hits timeout, did not complete within %d secs." %
184            self.OPERATION_TIMEOUT_SECS)
185        utils.PollAndWait(
186            func=self._GetOperationStatus,
187            expected_return="DONE",
188            timeout_exception=timeout_exception,
189            timeout_secs=self.OPERATION_TIMEOUT_SECS,
190            sleep_interval_secs=self.OPERATION_POLL_INTERVAL_SECS,
191            operation=operation,
192            operation_scope=operation_scope,
193            scope_name=scope_name)
194
195    def GetProject(self):
196        """Get project information.
197
198        Returns:
199          A project resource in json.
200        """
201        api = self.service.projects().get(project=self._project)
202        return self.Execute(api)
203
204    def GetDisk(self, disk_name, zone):
205        """Get disk information.
206
207        Args:
208          disk_name: A string.
209          zone: String, name of zone.
210
211        Returns:
212          An disk resource in json.
213          https://cloud.google.com/compute/docs/reference/latest/disks#resource
214        """
215        api = self.service.disks().get(
216            project=self._project, zone=zone, disk=disk_name)
217        return self.Execute(api)
218
219    def CheckDiskExists(self, disk_name, zone):
220        """Check if disk exists.
221
222        Args:
223          disk_name: A string
224          zone: String, name of zone.
225
226        Returns:
227          True if disk exists, otherwise False.
228        """
229        try:
230            self.GetDisk(disk_name, zone)
231            exists = True
232        except errors.ResourceNotFoundError:
233            exists = False
234        logger.debug("CheckDiskExists: disk_name: %s, result: %s", disk_name,
235                     exists)
236        return exists
237
238    def CreateDisk(self,
239                   disk_name,
240                   source_image,
241                   size_gb,
242                   zone,
243                   source_project=None,
244                   disk_type=PersistentDiskType.STANDARD):
245        """Create a gce disk.
246
247        Args:
248            disk_name: String
249            source_image: String, name of the image.
250            size_gb: Integer, size in gb.
251            zone: String, name of the zone, e.g. us-central1-b.
252            source_project: String, required if the image is located in a different
253                            project.
254            disk_type: String, a value from PersistentDiskType, STANDARD
255                       for regular hard disk or SSD for solid state disk.
256        """
257        source_project = source_project or self._project
258        source_image = "projects/%s/global/images/%s" % (
259            source_project, source_image) if source_image else None
260        logger.info("Creating disk %s, size_gb: %d, source_image: %s",
261                    disk_name, size_gb, str(source_image))
262        body = {
263            "name": disk_name,
264            "sizeGb": size_gb,
265            "type": "projects/%s/zones/%s/diskTypes/%s" % (self._project, zone,
266                                                           disk_type),
267        }
268        api = self.service.disks().insert(
269            project=self._project,
270            sourceImage=source_image,
271            zone=zone,
272            body=body)
273        operation = self.Execute(api)
274        try:
275            self.WaitOnOperation(
276                operation=operation,
277                operation_scope=OperationScope.ZONE,
278                scope_name=zone)
279        except errors.DriverError:
280            logger.error("Creating disk failed, cleaning up: %s", disk_name)
281            if self.CheckDiskExists(disk_name, zone):
282                self.DeleteDisk(disk_name, zone)
283            raise
284        logger.info("Disk %s has been created.", disk_name)
285
286    def DeleteDisk(self, disk_name, zone):
287        """Delete a gce disk.
288
289        Args:
290            disk_name: A string, name of disk.
291            zone: A string, name of zone.
292        """
293        logger.info("Deleting disk %s", disk_name)
294        api = self.service.disks().delete(
295            project=self._project, zone=zone, disk=disk_name)
296        operation = self.Execute(api)
297        self.WaitOnOperation(
298            operation=operation,
299            operation_scope=OperationScope.ZONE,
300            scope_name=zone)
301        logger.info("Deleted disk %s", disk_name)
302
303    def DeleteDisks(self, disk_names, zone):
304        """Delete multiple disks.
305
306        Args:
307            disk_names: A list of disk names.
308            zone: A string, name of zone.
309
310        Returns:
311            A tuple, (deleted, failed, error_msgs)
312            deleted: A list of names of disks that have been deleted.
313            failed: A list of names of disks that we fail to delete.
314            error_msgs: A list of failure messages.
315        """
316        if not disk_names:
317            logger.warning("Nothing to delete. Arg disk_names is not provided.")
318            return [], [], []
319        # Batch send deletion requests.
320        logger.info("Deleting disks: %s", disk_names)
321        delete_requests = {}
322        for disk_name in set(disk_names):
323            request = self.service.disks().delete(
324                project=self._project, disk=disk_name, zone=zone)
325            delete_requests[disk_name] = request
326        return self._BatchExecuteAndWait(
327            delete_requests, OperationScope.ZONE, scope_name=zone)
328
329    def ListDisks(self, zone, disk_filter=None):
330        """List disks.
331
332        Args:
333            zone: A string, representing zone name. e.g. "us-central1-f"
334            disk_filter: A string representing a filter in format of
335                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
336                             e.g. "name ne example-instance"
337                             e.g. "name eq "example-instance-[0-9]+""
338
339        Returns:
340            A list of disks.
341        """
342        return self.ListWithMultiPages(
343            api_resource=self.service.disks().list,
344            project=self._project,
345            zone=zone,
346            filter=disk_filter)
347
348    def CreateImage(self,
349                    image_name,
350                    source_uri=None,
351                    source_disk=None,
352                    labels=None):
353        """Create a Gce image.
354
355        Args:
356            image_name: String, name of image
357            source_uri: Full Google Cloud Storage URL where the disk image is
358                        stored.  e.g. "https://storage.googleapis.com/my-bucket/
359                        avd-system-2243663.tar.gz"
360            source_disk: String, this should be the disk's selfLink value
361                         (including zone and project), rather than the disk_name
362                         e.g. https://www.googleapis.com/compute/v1/projects/
363                              google.com:android-builds-project/zones/
364                              us-east1-d/disks/<disk_name>
365            labels: Dict, will be added to the image's labels.
366
367        Raises:
368            errors.DriverError: For malformed request or response.
369            errors.GceOperationTimeoutError: Operation takes too long to finish.
370        """
371        if self.CheckImageExists(image_name):
372            return
373        if (source_uri and source_disk) or (not source_uri
374                                            and not source_disk):
375            raise errors.DriverError(
376                "Creating image %s requires either source_uri %s or "
377                "source_disk %s but not both" % (image_name, source_uri,
378                                                 source_disk))
379        elif source_uri:
380            logger.info("Creating image %s, source_uri %s", image_name,
381                        source_uri)
382            body = {
383                "name": image_name,
384                "rawDisk": {
385                    "source": source_uri,
386                },
387            }
388        else:
389            logger.info("Creating image %s, source_disk %s", image_name,
390                        source_disk)
391            body = {
392                "name": image_name,
393                "sourceDisk": source_disk,
394            }
395        if labels is not None:
396            body["labels"] = labels
397        api = self.service.images().insert(project=self._project, body=body)
398        operation = self.Execute(api)
399        try:
400            self.WaitOnOperation(
401                operation=operation, operation_scope=OperationScope.GLOBAL)
402        except errors.DriverError:
403            logger.error("Creating image failed, cleaning up: %s", image_name)
404            if self.CheckImageExists(image_name):
405                self.DeleteImage(image_name)
406            raise
407        logger.info("Image %s has been created.", image_name)
408
409    @utils.RetryOnException(_IsFingerPrintError,
410                            _MAX_RETRIES_ON_FINGERPRINT_CONFLICT)
411    def SetImageLabels(self, image_name, new_labels):
412        """Update image's labels. Retry for finger print conflict.
413
414        Note: Decorator RetryOnException will retry the call for FingerPrint
415          conflict (HTTP error code 412). The fingerprint is used to detect
416          conflicts of GCE resource updates. The fingerprint is initially generated
417          by Compute Engine and changes after every request to modify or update
418          resources (e.g. GCE "image" resource has "fingerPrint" for "labels"
419          updates).
420
421        Args:
422            image_name: A string, the image name.
423            new_labels: Dict, will be added to the image's labels.
424
425        Returns:
426            A GlobalOperation resouce.
427            https://cloud.google.com/compute/docs/reference/latest/globalOperations
428        """
429        image = self.GetImage(image_name)
430        labels = image.get("labels", {})
431        labels.update(new_labels)
432        body = {
433            "labels": labels,
434            "labelFingerprint": image["labelFingerprint"]
435        }
436        api = self.service.images().setLabels(
437            project=self._project, resource=image_name, body=body)
438        return self.Execute(api)
439
440    def CheckImageExists(self, image_name):
441        """Check if image exists.
442
443        Args:
444            image_name: A string
445
446        Returns:
447            True if image exists, otherwise False.
448        """
449        try:
450            self.GetImage(image_name)
451            exists = True
452        except errors.ResourceNotFoundError:
453            exists = False
454        logger.debug("CheckImageExists: image_name: %s, result: %s",
455                     image_name, exists)
456        return exists
457
458    def GetImage(self, image_name, image_project=None):
459        """Get image information.
460
461        Args:
462            image_name: A string
463            image_project: A string
464
465        Returns:
466            An image resource in json.
467            https://cloud.google.com/compute/docs/reference/latest/images#resource
468        """
469        api = self.service.images().get(
470            project=image_project or self._project, image=image_name)
471        return self.Execute(api)
472
473    def DeleteImage(self, image_name):
474        """Delete an image.
475
476        Args:
477            image_name: A string
478        """
479        logger.info("Deleting image %s", image_name)
480        api = self.service.images().delete(
481            project=self._project, image=image_name)
482        operation = self.Execute(api)
483        self.WaitOnOperation(
484            operation=operation, operation_scope=OperationScope.GLOBAL)
485        logger.info("Deleted image %s", image_name)
486
487    def DeleteImages(self, image_names):
488        """Delete multiple images.
489
490        Args:
491            image_names: A list of image names.
492
493        Returns:
494            A tuple, (deleted, failed, error_msgs)
495            deleted: A list of names of images that have been deleted.
496            failed: A list of names of images that we fail to delete.
497            error_msgs: A list of failure messages.
498        """
499        if not image_names:
500            return [], [], []
501        # Batch send deletion requests.
502        logger.info("Deleting images: %s", image_names)
503        delete_requests = {}
504        for image_name in set(image_names):
505            request = self.service.images().delete(
506                project=self._project, image=image_name)
507            delete_requests[image_name] = request
508        return self._BatchExecuteAndWait(delete_requests,
509                                         OperationScope.GLOBAL)
510
511    def ListImages(self, image_filter=None, image_project=None):
512        """List images.
513
514        Args:
515            image_filter: A string representing a filter in format of
516                          FIELD_NAME COMPARISON_STRING LITERAL_STRING
517                          e.g. "name ne example-image"
518                          e.g. "name eq "example-image-[0-9]+""
519            image_project: String. If not provided, will list images from the default
520                           project. Otherwise, will list images from the given
521                           project, which can be any arbitrary project where the
522                           account has read access
523                           (i.e. has the role "roles/compute.imageUser")
524
525        Read more about image sharing across project:
526        https://cloud.google.com/compute/docs/images/sharing-images-across-projects
527
528        Returns:
529            A list of images.
530        """
531        return self.ListWithMultiPages(
532            api_resource=self.service.images().list,
533            project=image_project or self._project,
534            filter=image_filter)
535
536    def GetInstance(self, instance, zone):
537        """Get information about an instance.
538
539        Args:
540            instance: A string, representing instance name.
541            zone: A string, representing zone name. e.g. "us-central1-f"
542
543        Returns:
544            An instance resource in json.
545            https://cloud.google.com/compute/docs/reference/latest/instances#resource
546        """
547        api = self.service.instances().get(
548            project=self._project, zone=zone, instance=instance)
549        return self.Execute(api)
550
551    def AttachAccelerator(self, instance, zone, accelerator_count,
552                          accelerator_type):
553        """Attach a GPU accelerator to the instance.
554
555        Note: In order for this to succeed the following must hold:
556        - The machine schedule must be set to "terminate" i.e:
557          SetScheduling(self, instance, zone, on_host_maintenance="terminate")
558          must have been called.
559        - The machine is not starting or running. i.e.
560          StopInstance(self, instance) must have been called.
561
562        Args:
563            instance: A string, representing instance name.
564            zone: String, name of zone.
565            accelerator_count: The number accelerators to be attached to the instance.
566             a value of 0 will detach all accelerators.
567            accelerator_type: The type of accelerator to attach. e.g.
568              "nvidia-tesla-k80"
569        """
570        body = {
571            "guestAccelerators": [{
572                "acceleratorType":
573                self.GetAcceleratorUrl(accelerator_type, zone),
574                "acceleratorCount":
575                accelerator_count
576            }]
577        }
578        api = self.service.instances().setMachineResources(
579            project=self._project, zone=zone, instance=instance, body=body)
580        operation = self.Execute(api)
581        try:
582            self.WaitOnOperation(
583                operation=operation,
584                operation_scope=OperationScope.ZONE,
585                scope_name=zone)
586        except errors.GceOperationTimeoutError:
587            logger.error("Attach instance failed: %s", instance)
588            raise
589        logger.info("%d x %s have been attached to instance %s.",
590                    accelerator_count, accelerator_type, instance)
591
592    def AttachDisk(self, instance, zone, **kwargs):
593        """Attach the external disk to the instance.
594
595        Args:
596            instance: A string, representing instance name.
597            zone: String, name of zone.
598            **kwargs: The attachDisk request body. See "https://cloud.google.com/
599              compute/docs/reference/latest/instances/attachDisk" for detail.
600              {
601                "kind": "compute#attachedDisk",
602                "type": string,
603                "mode": string,
604                "source": string,
605                "deviceName": string,
606                "index": integer,
607                "boot": boolean,
608                "initializeParams": {
609                  "diskName": string,
610                  "sourceImage": string,
611                  "diskSizeGb": long,
612                  "diskType": string,
613                  "sourceImageEncryptionKey": {
614                    "rawKey": string,
615                    "sha256": string
616                  }
617                },
618                "autoDelete": boolean,
619                "licenses": [
620                  string
621                ],
622                "interface": string,
623                "diskEncryptionKey": {
624                  "rawKey": string,
625                  "sha256": string
626                }
627              }
628
629        Returns:
630            An disk resource in json.
631            https://cloud.google.com/compute/docs/reference/latest/disks#resource
632
633
634        Raises:
635            errors.GceOperationTimeoutError: Operation takes too long to finish.
636        """
637        api = self.service.instances().attachDisk(
638            project=self._project, zone=zone, instance=instance, body=kwargs)
639        operation = self.Execute(api)
640        try:
641            self.WaitOnOperation(
642                operation=operation,
643                operation_scope=OperationScope.ZONE,
644                scope_name=zone)
645        except errors.GceOperationTimeoutError:
646            logger.error("Attach instance failed: %s", instance)
647            raise
648        logger.info("Disk has been attached to instance %s.", instance)
649
650    def DetachDisk(self, instance, zone, disk_name):
651        """Attach the external disk to the instance.
652
653        Args:
654            instance: A string, representing instance name.
655            zone: String, name of zone.
656            disk_name: A string, the name of the detach disk.
657
658        Returns:
659            A ZoneOperation resource.
660            See https://cloud.google.com/compute/docs/reference/latest/zoneOperations
661
662        Raises:
663            errors.GceOperationTimeoutError: Operation takes too long to finish.
664        """
665        api = self.service.instances().detachDisk(
666            project=self._project,
667            zone=zone,
668            instance=instance,
669            deviceName=disk_name)
670        operation = self.Execute(api)
671        try:
672            self.WaitOnOperation(
673                operation=operation,
674                operation_scope=OperationScope.ZONE,
675                scope_name=zone)
676        except errors.GceOperationTimeoutError:
677            logger.error("Detach instance failed: %s", instance)
678            raise
679        logger.info("Disk has been detached to instance %s.", instance)
680
681    def StartInstance(self, instance, zone):
682        """Start |instance| in |zone|.
683
684        Args:
685            instance: A string, representing instance name.
686            zone: A string, representing zone name. e.g. "us-central1-f"
687
688        Raises:
689            errors.GceOperationTimeoutError: Operation takes too long to finish.
690        """
691        api = self.service.instances().start(
692            project=self._project, zone=zone, instance=instance)
693        operation = self.Execute(api)
694        try:
695            self.WaitOnOperation(
696                operation=operation,
697                operation_scope=OperationScope.ZONE,
698                scope_name=zone)
699        except errors.GceOperationTimeoutError:
700            logger.error("Start instance failed: %s", instance)
701            raise
702        logger.info("Instance %s has been started.", instance)
703
704    def StartInstances(self, instances, zone):
705        """Start |instances| in |zone|.
706
707        Args:
708            instances: A list of strings, representing instance names's list.
709            zone: A string, representing zone name. e.g. "us-central1-f"
710
711        Returns:
712            A tuple, (done, failed, error_msgs)
713            done: A list of string, representing the names of instances that
714              have been executed.
715            failed: A list of string, representing the names of instances that
716              we failed to execute.
717            error_msgs: A list of string, representing the failure messages.
718        """
719        action = functools.partial(
720            self.service.instances().start, project=self._project, zone=zone)
721        return self._BatchExecuteOnInstances(instances, zone, action)
722
723    def StopInstance(self, instance, zone):
724        """Stop |instance| in |zone|.
725
726        Args:
727            instance: A string, representing instance name.
728            zone: A string, representing zone name. e.g. "us-central1-f"
729
730        Raises:
731            errors.GceOperationTimeoutError: Operation takes too long to finish.
732        """
733        api = self.service.instances().stop(
734            project=self._project, zone=zone, instance=instance)
735        operation = self.Execute(api)
736        try:
737            self.WaitOnOperation(
738                operation=operation,
739                operation_scope=OperationScope.ZONE,
740                scope_name=zone)
741        except errors.GceOperationTimeoutError:
742            logger.error("Stop instance failed: %s", instance)
743            raise
744        logger.info("Instance %s has been terminated.", instance)
745
746    def StopInstances(self, instances, zone):
747        """Stop |instances| in |zone|.
748
749        Args:
750            instances: A list of strings, representing instance names's list.
751            zone: A string, representing zone name. e.g. "us-central1-f"
752
753        Returns:
754            A tuple, (done, failed, error_msgs)
755            done: A list of string, representing the names of instances that
756                  have been executed.
757            failed: A list of string, representing the names of instances that
758                    we failed to execute.
759            error_msgs: A list of string, representing the failure messages.
760        """
761        action = functools.partial(
762            self.service.instances().stop, project=self._project, zone=zone)
763        return self._BatchExecuteOnInstances(instances, zone, action)
764
765    def SetScheduling(self,
766                      instance,
767                      zone,
768                      automatic_restart=True,
769                      on_host_maintenance="MIGRATE"):
770        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
771
772        Args:
773            instance: A string, representing instance name.
774            zone: A string, representing zone name. e.g. "us-central1-f".
775            automatic_restart: Boolean, determine whether the instance will
776                               automatically restart if it crashes or not,
777                               default to True.
778            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
779                                 The instance's maintenance behavior, which
780                                 determines whether the instance is live
781                                 "MIGRATE" or "TERMINATE" when there is
782                                 a maintenance event.
783
784        Raises:
785            errors.GceOperationTimeoutError: Operation takes too long to finish.
786        """
787        body = {
788            "automaticRestart": automatic_restart,
789            "onHostMaintenance": on_host_maintenance
790        }
791        api = self.service.instances().setScheduling(
792            project=self._project, zone=zone, instance=instance, body=body)
793        operation = self.Execute(api)
794        try:
795            self.WaitOnOperation(
796                operation=operation,
797                operation_scope=OperationScope.ZONE,
798                scope_name=zone)
799        except errors.GceOperationTimeoutError:
800            logger.error("Set instance scheduling failed: %s", instance)
801            raise
802        logger.info(
803            "Instance scheduling changed:\n"
804            "    automaticRestart: %s\n"
805            "    onHostMaintenance: %s\n",
806            str(automatic_restart).lower(), on_host_maintenance)
807
808    def ListInstances(self, instance_filter=None):
809        """List instances cross all zones.
810
811        Gcompute response instance. For example:
812        {
813            'items':
814            {
815                'zones/europe-west3-b':
816                {
817                    'warning':
818                    {
819                        'message': "There are no results for scope
820                        'zones/europe-west3-b' on this page.",
821                        'code': 'NO_RESULTS_ON_PAGE',
822                        'data': [{'value': u'zones/europe-west3-b',
823                                  'key': u'scope'}]
824                    }
825                },
826                'zones/asia-east1-b':
827                {
828                    'instances': [
829                    {
830                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
831                        'status': 'RUNNING',
832                        'cpuPlatform': 'Intel Broadwell',
833                        'startRestricted': False,
834                        'labels': {u'created_by': u'herbertxue'},
835                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
836                        ...
837                    }]
838                }
839            }
840        }
841
842        Args:
843            instance_filter: A string representing a filter in format of
844                             FIELD_NAME COMPARISON_STRING LITERAL_STRING
845                             e.g. "name ne example-instance"
846                             e.g. "name eq "example-instance-[0-9]+""
847
848        Returns:
849            A list of instances.
850        """
851        # aggregatedList will only return 500 results max, so if there are more,
852        # we need to send in the next page token to get the next 500 (and so on
853        # and so forth.
854        get_more_instances = True
855        page_token = None
856        instances_list = []
857        while get_more_instances:
858            api = self.service.instances().aggregatedList(
859                project=self._project,
860                filter=instance_filter,
861                pageToken=page_token)
862            response = self.Execute(api)
863            page_token = response.get("nextPageToken")
864            get_more_instances = page_token is not None
865            for instances_data in response["items"].values():
866                if "instances" in instances_data:
867                    for instance in instances_data.get("instances"):
868                        instances_list.append(instance)
869
870        return instances_list
871
872    def SetSchedulingInstances(self,
873                               instances,
874                               zone,
875                               automatic_restart=True,
876                               on_host_maintenance="MIGRATE"):
877        """Update scheduling config |automatic_restart| and |on_host_maintenance|.
878
879        See //cloud/cluster/api/mixer_instances.proto Scheduling for config option.
880
881        Args:
882            instances: A list of string, representing instance names.
883            zone: A string, representing zone name. e.g. "us-central1-f".
884            automatic_restart: Boolean, determine whether the instance will
885                               automatically restart if it crashes or not,
886                               default to True.
887            on_host_maintenance: enum["MIGRATE", "TERMINATE"]
888                                 The instance's maintenance behavior, which
889                                 determines whether the instance is live
890                                 migrated or terminated when there is
891                                 a maintenance event.
892
893        Returns:
894            A tuple, (done, failed, error_msgs)
895            done: A list of string, representing the names of instances that
896                  have been executed.
897            failed: A list of string, representing the names of instances that
898                    we failed to execute.
899            error_msgs: A list of string, representing the failure messages.
900        """
901        body = {
902            "automaticRestart": automatic_restart,
903            "OnHostMaintenance": on_host_maintenance
904        }
905        action = functools.partial(
906            self.service.instances().setScheduling,
907            project=self._project,
908            zone=zone,
909            body=body)
910        return self._BatchExecuteOnInstances(instances, zone, action)
911
912    def _BatchExecuteOnInstances(self, instances, zone, action):
913        """Batch processing operations requiring computing time.
914
915        Args:
916            instances: A list of instance names.
917            zone: A string, e.g. "us-central1-f".
918            action: partial func, all kwargs for this gcloud action has been
919                    defined in the caller function (e.g. See "StartInstances")
920                    except 'instance' which will be defined by iterating the
921                    |instances|.
922
923        Returns:
924            A tuple, (done, failed, error_msgs)
925            done: A list of string, representing the names of instances that
926                  have been executed.
927            failed: A list of string, representing the names of instances that
928                    we failed to execute.
929            error_msgs: A list of string, representing the failure messages.
930        """
931        if not instances:
932            return [], [], []
933        # Batch send requests.
934        logger.info("Batch executing instances: %s", instances)
935        requests = {}
936        for instance_name in set(instances):
937            requests[instance_name] = action(instance=instance_name)
938        return self._BatchExecuteAndWait(
939            requests, operation_scope=OperationScope.ZONE, scope_name=zone)
940
941    def _BatchExecuteAndWait(self, requests, operation_scope, scope_name=None):
942        """Batch processing requests and wait on the operation.
943
944        Args:
945            requests: A dictionary. The key is a string representing the resource
946                      name. For example, an instance name, or an image name.
947            operation_scope: A value from OperationScope, "zone", "region",
948                             or "global".
949            scope_name: If operation_scope is "zone" or "region", this should be
950                        the name of the zone or region, e.g. "us-central1-f".
951        Returns:
952            A tuple, (done, failed, error_msgs)
953            done: A list of string, representing the resource names that have
954                  been executed.
955            failed: A list of string, representing resource names that
956                    we failed to execute.
957            error_msgs: A list of string, representing the failure messages.
958        """
959        results = self.BatchExecute(requests)
960        # Initialize return values
961        failed = []
962        error_msgs = []
963        for resource_name, (_, error) in results.iteritems():
964            if error is not None:
965                failed.append(resource_name)
966                error_msgs.append(str(error))
967        done = []
968        # Wait for the executing operations to finish.
969        logger.info("Waiting for executing operations")
970        for resource_name in six.iterkeys(requests):
971            operation, _ = results[resource_name]
972            if operation:
973                try:
974                    self.WaitOnOperation(operation, operation_scope,
975                                         scope_name)
976                    done.append(resource_name)
977                except errors.DriverError as exc:
978                    failed.append(resource_name)
979                    error_msgs.append(str(exc))
980        return done, failed, error_msgs
981
982    def ListZones(self):
983        """List all zone instances in the project.
984
985        Returns:
986            Gcompute response instance. For example:
987            {
988              "id": "projects/google.com%3Aandroid-build-staging/zones",
989              "kind": "compute#zoneList",
990              "selfLink": "https://www.googleapis.com/compute/v1/projects/"
991                  "google.com:android-build-staging/zones"
992              "items": [
993                {
994                  'creationTimestamp': '2014-07-15T10:44:08.663-07:00',
995                  'description': 'asia-east1-c',
996                  'id': '2222',
997                  'kind': 'compute#zone',
998                  'name': 'asia-east1-c',
999                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1000                      'google.com:android-build-staging/regions/asia-east1',
1001                  'selfLink': 'https://www.googleapis.com/compute/v1/projects/'
1002                      'google.com:android-build-staging/zones/asia-east1-c',
1003                  'status': 'UP'
1004                }, {
1005                  'creationTimestamp': '2014-05-30T18:35:16.575-07:00',
1006                  'description': 'asia-east1-b',
1007                  'id': '2221',
1008                  'kind': 'compute#zone',
1009                  'name': 'asia-east1-b',
1010                  'region': 'https://www.googleapis.com/compute/v1/projects/'
1011                      'google.com:android-build-staging/regions/asia-east1',
1012                  'selfLink': 'https://www.googleapis.com/compute/v1/projects'
1013                      '/google.com:android-build-staging/zones/asia-east1-b',
1014                  'status': 'UP'
1015                }]
1016            }
1017            See cloud cluster's api/mixer_zones.proto
1018        """
1019        api = self.service.zones().list(project=self._project)
1020        return self.Execute(api)
1021
1022    def ListRegions(self):
1023        """List all the regions for a project.
1024
1025        Returns:
1026            A dictionary containing all the zones and additional data. See this link
1027            for the detailed response:
1028            https://cloud.google.com/compute/docs/reference/latest/regions/list.
1029            Example:
1030            {
1031              'items': [{
1032                  'name':
1033                      'us-central1',
1034                  'quotas': [{
1035                      'usage': 2.0,
1036                      'limit': 24.0,
1037                      'metric': 'CPUS'
1038                  }, {
1039                      'usage': 1.0,
1040                      'limit': 23.0,
1041                      'metric': 'IN_USE_ADDRESSES'
1042                  }, {
1043                      'usage': 209.0,
1044                      'limit': 10240.0,
1045                      'metric': 'DISKS_TOTAL_GB'
1046                  }, {
1047                      'usage': 1000.0,
1048                      'limit': 20000.0,
1049                      'metric': 'INSTANCES'
1050                  }]
1051              },..]
1052            }
1053        """
1054        api = self.service.regions().list(project=self._project)
1055        return self.Execute(api)
1056
1057    def _GetNetworkArgs(self, network, zone):
1058        """Helper to generate network args that is used to create an instance.
1059
1060        Args:
1061            network: A string, e.g. "default".
1062            zone: String, representing zone name, e.g. "us-central1-f"
1063
1064        Returns:
1065            A dictionary representing network args.
1066        """
1067        network_args = {
1068            "network": self.GetNetworkUrl(network),
1069            "accessConfigs": [{
1070                "name": "External NAT",
1071                "type": "ONE_TO_ONE_NAT"
1072            }]
1073        }
1074        # default network can be blank or set to default, we don't need to
1075        # specify the subnetwork for that.
1076        if network and network != "default":
1077            network_args["subnetwork"] = self.GetSubnetworkUrl(network, zone)
1078        return network_args
1079
1080    def _GetDiskArgs(self,
1081                     disk_name,
1082                     image_name,
1083                     image_project=None,
1084                     disk_size_gb=None):
1085        """Helper to generate disk args that is used to create an instance.
1086
1087        Args:
1088            disk_name: A string
1089            image_name: A string
1090            image_project: A string
1091            disk_size_gb: An integer
1092
1093        Returns:
1094            List holding dict of disk args.
1095        """
1096        args = copy.deepcopy(BASE_DISK_ARGS)
1097        args["initializeParams"] = {
1098            "diskName": disk_name,
1099            "sourceImage": self.GetImage(image_name,
1100                                         image_project)["selfLink"],
1101        }
1102        # TODO: Remove this check once it's validated that we can either pass in
1103        # a None diskSizeGb or we find an appropriate default val.
1104        if disk_size_gb:
1105            args["diskSizeGb"] = disk_size_gb
1106        return [args]
1107
1108    def _GetExtraDiskArgs(self, extra_disk_name, zone):
1109        """Get extra disk arg for given disk.
1110
1111        Args:
1112            extra_disk_name: String, name of the disk.
1113            zone: String, representing zone name, e.g. "us-central1-f"
1114
1115        Returns:
1116            A dictionary of disk args.
1117        """
1118        return [{
1119            "type": "PERSISTENT",
1120            "mode": "READ_WRITE",
1121            "source": "projects/%s/zones/%s/disks/%s" % (self._project, zone,
1122                                                         extra_disk_name),
1123            "autoDelete": True,
1124            "boot": False,
1125            "interface": "SCSI",
1126            "deviceName": extra_disk_name,
1127        }]
1128
1129    # pylint: disable=too-many-locals
1130    def CreateInstance(self,
1131                       instance,
1132                       image_name,
1133                       machine_type,
1134                       metadata,
1135                       network,
1136                       zone,
1137                       disk_args=None,
1138                       image_project=None,
1139                       gpu=None,
1140                       extra_disk_name=None,
1141                       extra_scopes=None,
1142                       tags=None):
1143        """Create a gce instance with a gce image.
1144
1145        Args:
1146            instance: String, instance name.
1147            image_name: String, source image used to create this disk.
1148            machine_type: String, representing machine_type,
1149                          e.g. "n1-standard-1"
1150            metadata: Dict, maps a metadata name to its value.
1151            network: String, representing network name, e.g. "default"
1152            zone: String, representing zone name, e.g. "us-central1-f"
1153            disk_args: A list of extra disk args (strings), see _GetDiskArgs
1154                       for example, if None, will create a disk using the given
1155                       image.
1156            image_project: String, name of the project where the image
1157                           belongs. Assume the default project if None.
1158            gpu: String, type of gpu to attach. e.g. "nvidia-tesla-k80", if
1159                 None no gpus will be attached. For more details see:
1160                 https://cloud.google.com/compute/docs/gpus/add-gpus
1161            extra_disk_name: String,the name of the extra disk to attach.
1162            extra_scopes: A list of extra scopes to be provided to the instance.
1163            tags: A list of tags to associate with the instance. e.g.
1164                  ["http-server", "https-server"]
1165        """
1166        disk_args = (disk_args
1167                     or self._GetDiskArgs(instance, image_name, image_project))
1168        if extra_disk_name:
1169            disk_args.extend(self._GetExtraDiskArgs(extra_disk_name, zone))
1170
1171        scopes = []
1172        scopes.extend(self.DEFAULT_INSTANCE_SCOPE)
1173        if extra_scopes:
1174            scopes.extend(extra_scopes)
1175
1176        # Add labels for giving the instances ability to be filter for
1177        # acloud list/delete cmds.
1178        body = {
1179            "machineType": self.GetMachineType(machine_type, zone)["selfLink"],
1180            "name": instance,
1181            "networkInterfaces": [self._GetNetworkArgs(network, zone)],
1182            "disks": disk_args,
1183            "labels": {constants.LABEL_CREATE_BY: getpass.getuser()},
1184            "serviceAccounts": [{
1185                "email": "default",
1186                "scopes": scopes,
1187            }],
1188        }
1189
1190        if tags:
1191            body["tags"] = {"items": tags}
1192        if gpu:
1193            body["guestAccelerators"] = [{
1194                "acceleratorType": self.GetAcceleratorUrl(gpu, zone),
1195                "acceleratorCount": 1
1196            }]
1197            # Instances with GPUs cannot live migrate because they are assigned
1198            # to specific hardware devices.
1199            body["scheduling"] = {"onHostMaintenance": "terminate"}
1200        if metadata:
1201            metadata_list = [{
1202                _METADATA_KEY: key,
1203                _METADATA_KEY_VALUE: val
1204            } for key, val in metadata.iteritems()]
1205            body[_METADATA] = {_ITEMS: metadata_list}
1206        logger.info("Creating instance: project %s, zone %s, body:%s",
1207                    self._project, zone, body)
1208        api = self.service.instances().insert(
1209            project=self._project, zone=zone, body=body)
1210        operation = self.Execute(api)
1211        self.WaitOnOperation(
1212            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1213        logger.info("Instance %s has been created.", instance)
1214
1215    def DeleteInstance(self, instance, zone):
1216        """Delete a gce instance.
1217
1218        Args:
1219            instance: A string, instance name.
1220            zone: A string, e.g. "us-central1-f"
1221        """
1222        logger.info("Deleting instance: %s", instance)
1223        api = self.service.instances().delete(
1224            project=self._project, zone=zone, instance=instance)
1225        operation = self.Execute(api)
1226        self.WaitOnOperation(
1227            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1228        logger.info("Deleted instance: %s", instance)
1229
1230    def DeleteInstances(self, instances, zone):
1231        """Delete multiple instances.
1232
1233        Args:
1234            instances: A list of instance names.
1235            zone: A string, e.g. "us-central1-f".
1236
1237        Returns:
1238            A tuple, (deleted, failed, error_msgs)
1239            deleted: A list of names of instances that have been deleted.
1240            failed: A list of names of instances that we fail to delete.
1241            error_msgs: A list of failure messages.
1242        """
1243        action = functools.partial(
1244            self.service.instances().delete, project=self._project, zone=zone)
1245        return self._BatchExecuteOnInstances(instances, zone, action)
1246
1247    def ResetInstance(self, instance, zone):
1248        """Reset the gce instance.
1249
1250        Args:
1251            instance: A string, instance name.
1252            zone: A string, e.g. "us-central1-f".
1253        """
1254        logger.info("Resetting instance: %s", instance)
1255        api = self.service.instances().reset(
1256            project=self._project, zone=zone, instance=instance)
1257        operation = self.Execute(api)
1258        self.WaitOnOperation(
1259            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1260        logger.info("Instance has been reset: %s", instance)
1261
1262    def GetMachineType(self, machine_type, zone):
1263        """Get URL for a given machine typle.
1264
1265        Args:
1266            machine_type: A string, name of the machine type.
1267            zone: A string, e.g. "us-central1-f"
1268
1269        Returns:
1270            A machine type resource in json.
1271            https://cloud.google.com/compute/docs/reference/latest/
1272            machineTypes#resource
1273        """
1274        api = self.service.machineTypes().get(
1275            project=self._project, zone=zone, machineType=machine_type)
1276        return self.Execute(api)
1277
1278    def GetAcceleratorUrl(self, accelerator_type, zone):
1279        """Get URL for a given type of accelator.
1280
1281        Args:
1282            accelerator_type: A string, representing the accelerator, e.g
1283              "nvidia-tesla-k80"
1284            zone: A string representing a zone, e.g. "us-west1-b"
1285
1286        Returns:
1287            A URL that points to the accelerator resource, e.g.
1288            https://www.googleapis.com/compute/v1/projects/<project id>/zones/
1289            us-west1-b/acceleratorTypes/nvidia-tesla-k80
1290        """
1291        api = self.service.acceleratorTypes().get(
1292            project=self._project, zone=zone, acceleratorType=accelerator_type)
1293        result = self.Execute(api)
1294        return result["selfLink"]
1295
1296    def GetNetworkUrl(self, network):
1297        """Get URL for a given network.
1298
1299        Args:
1300            network: A string, representing network name, e.g "default"
1301
1302        Returns:
1303            A URL that points to the network resource, e.g.
1304            https://www.googleapis.com/compute/v1/projects/<project id>/
1305            global/networks/default
1306        """
1307        api = self.service.networks().get(
1308            project=self._project, network=network)
1309        result = self.Execute(api)
1310        return result["selfLink"]
1311
1312    def GetSubnetworkUrl(self, network, zone):
1313        """Get URL for a given network and zone.
1314
1315        Return the subnetwork for the network in the specified region that the
1316        specified zone resides in. If there is no subnetwork for the specified
1317        zone, raise an exception.
1318
1319        Args:
1320            network: A string, representing network name, e.g "default"
1321            zone: String, representing zone name, e.g. "us-central1-f"
1322
1323        Returns:
1324            A URL that points to the network resource, e.g.
1325            https://www.googleapis.com/compute/v1/projects/<project id>/
1326            global/networks/default
1327
1328        Raises:
1329            errors.NoSubnetwork: When no subnetwork exists for the zone
1330            specified.
1331        """
1332        api = self.service.networks().get(
1333            project=self._project, network=network)
1334        result = self.Execute(api)
1335        region = zone.rsplit("-", 1)[0]
1336        for subnetwork in result.get("subnetworks", []):
1337            if region in subnetwork:
1338                return subnetwork
1339        raise errors.NoSubnetwork("No subnetwork for network %s in region %s" %
1340                                  (network, region))
1341
1342    def CompareMachineSize(self, machine_type_1, machine_type_2, zone):
1343        """Compare the size of two machine types.
1344
1345        Args:
1346            machine_type_1: A string representing a machine type, e.g. n1-standard-1
1347            machine_type_2: A string representing a machine type, e.g. n1-standard-1
1348            zone: A string representing a zone, e.g. "us-central1-f"
1349
1350        Returns:
1351            -1 if any metric of machine size of the first type is smaller than
1352                the second type.
1353            0 if all metrics of machine size are equal.
1354            1 if at least one metric of machine size of the first type is
1355                greater than the second type and all metrics of first type are
1356                greater or equal to the second type.
1357
1358        Raises:
1359            errors.DriverError: For malformed response.
1360        """
1361        machine_info_1 = self.GetMachineType(machine_type_1, zone)
1362        machine_info_2 = self.GetMachineType(machine_type_2, zone)
1363        result = 0
1364        for metric in self.MACHINE_SIZE_METRICS:
1365            if metric not in machine_info_1 or metric not in machine_info_2:
1366                raise errors.DriverError(
1367                    "Malformed machine size record: Can't find '%s' in %s or %s"
1368                    % (metric, machine_info_1, machine_info_2))
1369            cmp_result = machine_info_1[metric] - machine_info_2[metric]
1370            if cmp_result < 0:
1371                return -1
1372            if cmp_result > 0:
1373                result = 1
1374        return result
1375
1376    def GetSerialPortOutput(self, instance, zone, port=1):
1377        """Get serial port output.
1378
1379        Args:
1380            instance: string, instance name.
1381            zone: string, zone name.
1382            port: int, which COM port to read from, 1-4, default to 1.
1383
1384        Returns:
1385            String, contents of the output.
1386
1387        Raises:
1388            errors.DriverError: For malformed response.
1389        """
1390        api = self.service.instances().getSerialPortOutput(
1391            project=self._project, zone=zone, instance=instance, port=port)
1392        result = self.Execute(api)
1393        if "contents" not in result:
1394            raise errors.DriverError(
1395                "Malformed response for GetSerialPortOutput: %s" % result)
1396        return result["contents"]
1397
1398    def GetInstanceNamesByIPs(self, ips):
1399        """Get Instance names by IPs.
1400
1401        This function will go through all instances, which
1402        could be slow if there are too many instances.  However, currently
1403        GCE doesn't support search for instance by IP.
1404
1405        Args:
1406            ips: A set of IPs.
1407
1408        Returns:
1409            A dictionary where key is IP and value is instance name or None
1410            if instance is not found for the given IP.
1411        """
1412        ip_name_map = dict.fromkeys(ips)
1413        for instance in self.ListInstances():
1414            try:
1415                ip = instance["networkInterfaces"][0]["accessConfigs"][0][
1416                    "natIP"]
1417                if ip in ips:
1418                    ip_name_map[ip] = instance["name"]
1419            except (IndexError, KeyError) as e:
1420                logger.error("Could not get instance names by ips: %s", str(e))
1421        return ip_name_map
1422
1423    def GetInstanceIP(self, instance, zone):
1424        """Get Instance IP given instance name.
1425
1426        Args:
1427            instance: String, representing instance name.
1428            zone: String, name of the zone.
1429
1430        Returns:
1431            ssh.IP object, that stores internal and external ip of the instance.
1432        """
1433        instance = self.GetInstance(instance, zone)
1434        internal_ip = instance["networkInterfaces"][0]["networkIP"]
1435        external_ip = instance["networkInterfaces"][0]["accessConfigs"][0]["natIP"]
1436        return IP(internal=internal_ip, external=external_ip)
1437
1438    @utils.TimeExecute(function_description="Updating instance metadata: ")
1439    def SetInstanceMetadata(self, zone, instance, body):
1440        """Set instance metadata.
1441
1442        Args:
1443            zone: String, name of zone.
1444            instance: String, representing instance name.
1445            body: Dict, Metadata body.
1446                  metdata is in the following format.
1447                  {
1448                    "kind": "compute#metadata",
1449                    "fingerprint": "a-23icsyx4E=",
1450                    "items": [
1451                      {
1452                        "key": "sshKeys",
1453                        "value": "key"
1454                      }, ...
1455                    ]
1456                  }
1457        """
1458        api = self.service.instances().setMetadata(
1459            project=self._project, zone=zone, instance=instance, body=body)
1460        operation = self.Execute(api)
1461        self.WaitOnOperation(
1462            operation, operation_scope=OperationScope.ZONE, scope_name=zone)
1463
1464    def AddSshRsaInstanceMetadata(self, user, ssh_rsa_path, instance):
1465        """Add the public rsa key to the instance's metadata.
1466
1467        Confirm that the instance has this public key in the instance's
1468        metadata, if not we will add this public key.
1469
1470        Args:
1471            user: String, name of the user which the key belongs to.
1472            ssh_rsa_path: String, The absolute path to public rsa key.
1473            instance: String, representing instance name.
1474        """
1475        ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1476        rsa = GetRsaKey(ssh_rsa_path)
1477        entry = "%s:%s" % (user, rsa)
1478        logger.debug("New RSA entry: %s", entry)
1479
1480        zone = self.GetZoneByInstance(instance)
1481        gce_instance = self.GetInstance(instance, zone)
1482        metadata = gce_instance.get(_METADATA)
1483        if RsaNotInMetadata(metadata, entry):
1484            self.UpdateRsaInMetadata(zone, instance, metadata, entry)
1485
1486    def GetZoneByInstance(self, instance):
1487        """Get the zone from instance name.
1488
1489        Gcompute response instance. For example:
1490        {
1491            'items':
1492            {
1493                'zones/europe-west3-b':
1494                {
1495                    'warning':
1496                    {
1497                        'message': "There are no results for scope
1498                        'zones/europe-west3-b' on this page.",
1499                        'code': 'NO_RESULTS_ON_PAGE',
1500                        'data': [{'value': u'zones/europe-west3-b',
1501                                  'key': u'scope'}]
1502                    }
1503                },
1504                'zones/asia-east1-b':
1505                {
1506                    'instances': [
1507                    {
1508                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone'
1509                        'status': 'RUNNING',
1510                        'cpuPlatform': 'Intel Broadwell',
1511                        'startRestricted': False,
1512                        'labels': {u'created_by': u'herbertxue'},
1513                        'name': 'ins-bc212dc8-userbuild-aosp-cf-x86-64-phone',
1514                        ...
1515                    }]
1516                }
1517            }
1518        }
1519
1520        Args:
1521            instance: String, representing instance name.
1522
1523        Raises:
1524            errors.GetGceZoneError: Can't get zone from instance name.
1525
1526        Returns:
1527            String of zone name.
1528        """
1529        api = self.service.instances().aggregatedList(
1530            project=self._project,
1531            filter="name=%s" % instance)
1532        response = self.Execute(api)
1533        for zone, instance_data in response["items"].items():
1534            if "instances" in instance_data:
1535                zone_match = _ZONE_RE.match(zone)
1536                if zone_match:
1537                    return zone_match.group("zone")
1538        raise errors.GetGceZoneError("Can't get zone from the instance name %s"
1539                                     % instance)
1540
1541    def GetZonesByInstances(self, instances):
1542        """Get the zone from instance name.
1543
1544        Args:
1545            instances: List of strings, representing instance names.
1546
1547        Returns:
1548            A dictionary that contains the name of all instances in the zone.
1549            The key is the name of the zone, and the value is a list contains
1550            the name of the instances.
1551        """
1552        zone_instances = {}
1553        for instance in instances:
1554            zone = self.GetZoneByInstance(instance)
1555            if zone in zone_instances:
1556                zone_instances[zone].append(instance)
1557            else:
1558                zone_instances[zone] = [instance]
1559        return zone_instances
1560
1561    def CheckAccess(self):
1562        """Check if the user has read access to the cloud project.
1563
1564        Returns:
1565            True if the user has at least read access to the project.
1566            False otherwise.
1567
1568        Raises:
1569            errors.HttpError if other unexpected error happens when
1570            accessing the project.
1571        """
1572        api = self.service.zones().list(project=self._project)
1573        retry_http_codes = copy.copy(self.RETRY_HTTP_CODES)
1574        retry_http_codes.remove(self.ACCESS_DENIED_CODE)
1575        try:
1576            self.Execute(api, retry_http_codes=retry_http_codes)
1577        except errors.HttpError as e:
1578            if e.code == self.ACCESS_DENIED_CODE:
1579                return False
1580            raise
1581        return True
1582
1583    def UpdateRsaInMetadata(self, zone, instance, metadata, entry):
1584        """Update ssh public key to sshKeys's value in this metadata.
1585
1586        Args:
1587            zone: String, name of zone.
1588            instance: String, representing instance name.
1589            metadata: Dict, maps a metadata name to its value.
1590            entry: String, ssh public key.
1591        """
1592        ssh_key_item = GetSshKeyFromMetadata(metadata)
1593        if ssh_key_item:
1594            # The ssh key exists in the metadata so update the reference to it
1595            # in the metadata. There may not be an actual ssh key value so
1596            # that's why we filter for None to avoid an empty line in front.
1597            ssh_key_item[_METADATA_KEY_VALUE] = "\n".join(
1598                list(filter(None, [ssh_key_item[_METADATA_KEY_VALUE], entry])))
1599        else:
1600            # Since there is no ssh key item in the metadata, we need to add it in.
1601            ssh_key_item = {_METADATA_KEY: _SSH_KEYS_NAME,
1602                            _METADATA_KEY_VALUE: entry}
1603            metadata[_ITEMS].append(ssh_key_item)
1604        utils.PrintColorString(
1605            "Ssh public key doesn't exist in the instance(%s), adding it."
1606            % instance, utils.TextColors.WARNING)
1607        self.SetInstanceMetadata(zone, instance, metadata)
1608
1609
1610def RsaNotInMetadata(metadata, entry):
1611    """Check ssh public key exist in sshKeys's value.
1612
1613    Args:
1614        metadata: Dict, maps a metadata name to its value.
1615        entry: String, ssh public key.
1616
1617    Returns:
1618        Boolean. True if ssh public key doesn't exist in metadata.
1619    """
1620    for item in metadata.setdefault(_ITEMS, []):
1621        if item[_METADATA_KEY] == _SSH_KEYS_NAME:
1622            if entry in item[_METADATA_KEY_VALUE]:
1623                return False
1624    return True
1625
1626
1627def GetSshKeyFromMetadata(metadata):
1628    """Get ssh key item from metadata.
1629
1630    Args:
1631        metadata: Dict, maps a metadata name to its value.
1632
1633    Returns:
1634        Dict of ssk_key_item in metadata, None if can't find the ssh key item
1635        in metadata.
1636    """
1637    for item in metadata.setdefault(_ITEMS, []):
1638        if item.get(_METADATA_KEY, '') == _SSH_KEYS_NAME:
1639            return item
1640    return None
1641
1642
1643def GetRsaKey(ssh_rsa_path):
1644    """Get rsa key from rsa path.
1645
1646    Args:
1647        ssh_rsa_path: String, The absolute path to public rsa key.
1648
1649    Returns:
1650        String, rsa key.
1651
1652    Raises:
1653        errors.DriverError: RSA file does not exist.
1654    """
1655    ssh_rsa_path = os.path.expanduser(ssh_rsa_path)
1656    if not os.path.exists(ssh_rsa_path):
1657        raise errors.DriverError(
1658            "RSA file %s does not exist." % ssh_rsa_path)
1659
1660    with open(ssh_rsa_path) as f:
1661        rsa = f.read()
1662        # The space must be removed here for string processing,
1663        # if it is not string, it doesn't have a strip function.
1664        rsa = rsa.strip() if rsa else rsa
1665        utils.VerifyRsaPubKey(rsa)
1666    return rsa
1667