1# Copyright 2019 - The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""A client that manages Cuttlefish Virtual Device on compute engine.
15
16** CvdComputeClient **
17
18CvdComputeClient derives from AndroidComputeClient. It manges a google
19compute engine project that is setup for running Cuttlefish Virtual Devices.
20It knows how to create a host instance from Cuttlefish Stable Host Image, fetch
21Android build, and start Android within the host instance.
22
23** Class hierarchy **
24
25  base_cloud_client.BaseCloudApiClient
26                ^
27                |
28       gcompute_client.ComputeClient
29                ^
30                |
31       android_compute_client.AndroidComputeClient
32                ^
33                |
34       cvd_compute_client_multi_stage.CvdComputeClient
35
36"""
37
38import logging
39import os
40import stat
41import subprocess
42import tempfile
43import time
44
45from acloud import errors
46from acloud.internal import constants
47from acloud.internal.lib import android_build_client
48from acloud.internal.lib import android_compute_client
49from acloud.internal.lib import gcompute_client
50from acloud.internal.lib import utils
51from acloud.internal.lib.ssh import Ssh
52from acloud.pull import pull
53
54
55logger = logging.getLogger(__name__)
56
57_DECOMPRESS_KERNEL_ARG = "-decompress_kernel=true"
58_GPU_ARG = "-gpu_mode=drm_virgl"
59_DEFAULT_BRANCH = "aosp-master"
60_FETCHER_BUILD_TARGET = "aosp_cf_x86_phone-userdebug"
61_FETCHER_NAME = "fetch_cvd"
62# Time info to write in report.
63_FETCH_ARTIFACT = "fetch_artifact_time"
64_GCE_CREATE = "gce_create_time"
65_LAUNCH_CVD = "launch_cvd_time"
66# WebRTC args for launching AVD
67_GUEST_ENFORCE_SECURITY_FALSE = "--guest_enforce_security=false"
68_START_WEBRTC = "--start_webrtc"
69_VM_MANAGER = "--vm_manager=crosvm"
70_WEBRTC_ARGS = [_GUEST_ENFORCE_SECURITY_FALSE, _START_WEBRTC, _VM_MANAGER]
71_WEBRTC_PUBLIC_IP = "--webrtc_public_ip=%s"
72
73
74def _ProcessBuild(build_id=None, branch=None, build_target=None):
75    """Create a Cuttlefish fetch_cvd build string.
76
77    Args:
78        build_id: A specific build number to load from. Takes precedence over `branch`.
79        branch: A manifest-branch at which to get the latest build.
80        build_target: A particular device to load at the desired build.
81
82    Returns:
83        A string, used in the fetch_cvd cmd or None if all args are None.
84    """
85    if not build_target:
86        return build_id or branch
87    elif build_target and not branch:
88        branch = _DEFAULT_BRANCH
89    return (build_id or branch) + "/" + build_target
90
91
92class CvdComputeClient(android_compute_client.AndroidComputeClient):
93    """Client that manages Android Virtual Device."""
94
95    DATA_POLICY_CREATE_IF_MISSING = "create_if_missing"
96
97    def __init__(self,
98                 acloud_config,
99                 oauth2_credentials,
100                 boot_timeout_secs=None,
101                 ins_timeout_secs=None,
102                 report_internal_ip=None,
103                 gpu=None):
104        """Initialize.
105
106        Args:
107            acloud_config: An AcloudConfig object.
108            oauth2_credentials: An oauth2client.OAuth2Credentials instance.
109            boot_timeout_secs: Integer, the maximum time to wait for the AVD
110                               to boot up.
111            ins_timeout_secs: Integer, the maximum time to wait for the
112                              instance ready.
113            report_internal_ip: Boolean to report the internal ip instead of
114                                external ip.
115            gpu: String, GPU to attach to the device.
116        """
117        super(CvdComputeClient, self).__init__(acloud_config, oauth2_credentials)
118
119        self._fetch_cvd_version = acloud_config.fetch_cvd_version
120        self._build_api = (
121            android_build_client.AndroidBuildClient(oauth2_credentials))
122        self._ssh_private_key_path = acloud_config.ssh_private_key_path
123        self._boot_timeout_secs = boot_timeout_secs
124        self._ins_timeout_secs = ins_timeout_secs
125        self._report_internal_ip = report_internal_ip
126        self._gpu = gpu
127        # Store all failures result when creating one or multiple instances.
128        self._all_failures = dict()
129        self._extra_args_ssh_tunnel = acloud_config.extra_args_ssh_tunnel
130        self._ssh = None
131        self._ip = None
132        self._user = constants.GCE_USER
133        self._execution_time = {_FETCH_ARTIFACT: 0, _GCE_CREATE: 0, _LAUNCH_CVD: 0}
134
135    def InitRemoteHost(self, ssh, ip, user):
136        """Init remote host.
137
138        Check if we can ssh to the remote host, stop any cf instances running
139        on it, and remove existing files.
140
141        Args:
142            ssh: Ssh object.
143            ip: namedtuple (internal, external) that holds IP address of the
144                remote host, e.g. "external:140.110.20.1, internal:10.0.0.1"
145            user: String of user log in to the instance.
146        """
147        self._ssh = ssh
148        self._ip = ip
149        self._user = user
150        self._ssh.WaitForSsh(timeout=self._ins_timeout_secs)
151        self.StopCvd()
152        self.CleanUp()
153
154    # pylint: disable=arguments-differ,too-many-locals,broad-except
155    def CreateInstance(self, instance, image_name, image_project,
156                       build_target=None, branch=None, build_id=None,
157                       kernel_branch=None, kernel_build_id=None,
158                       kernel_build_target=None, blank_data_disk_size_gb=None,
159                       avd_spec=None, extra_scopes=None,
160                       system_build_target=None, system_branch=None,
161                       system_build_id=None):
162
163        """Create/Reuse a single configured cuttlefish device.
164        1. Prepare GCE instance.
165           Create a new instnace or get IP address for reusing the specific instance.
166        2. Put fetch_cvd on the instance.
167        3. Invoke fetch_cvd to fetch and run the instance.
168
169        Args:
170            instance: instance name.
171            image_name: A string, the name of the GCE image.
172            image_project: A string, name of the project where the image lives.
173                           Assume the default project if None.
174            build_target: Target name, e.g. "aosp_cf_x86_phone-userdebug"
175            branch: Branch name, e.g. "aosp-master"
176            build_id: Build id, a string, e.g. "2263051", "P2804227"
177            kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14"
178            kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427"
179            kernel_build_target: String, Kernel build target name.
180            blank_data_disk_size_gb: Size of the blank data disk in GB.
181            avd_spec: An AVDSpec instance.
182            extra_scopes: A list of extra scopes to be passed to the instance.
183            system_build_target: Target name for the system image,
184                                e.g. "cf_x86_phone-userdebug"
185            system_branch: A String, branch name for the system image.
186            system_build_id: A string, build id for the system image.
187
188        Returns:
189            A string, representing instance name.
190        """
191
192        # A blank data disk would be created on the host. Make sure the size of
193        # the boot disk is large enough to hold it.
194        boot_disk_size_gb = (
195            int(self.GetImage(image_name, image_project)["diskSizeGb"]) +
196            blank_data_disk_size_gb)
197
198        if avd_spec and avd_spec.instance_name_to_reuse:
199            self._ip = self._ReusingGceInstance(avd_spec)
200        else:
201            self._ip = self._CreateGceInstance(instance, image_name, image_project,
202                                               extra_scopes, boot_disk_size_gb,
203                                               avd_spec)
204        self._ssh = Ssh(ip=self._ip,
205                        user=constants.GCE_USER,
206                        ssh_private_key_path=self._ssh_private_key_path,
207                        extra_args_ssh_tunnel=self._extra_args_ssh_tunnel,
208                        report_internal_ip=self._report_internal_ip)
209        try:
210            self._ssh.WaitForSsh(timeout=self._ins_timeout_secs)
211            if avd_spec:
212                if avd_spec.instance_name_to_reuse:
213                    self.StopCvd()
214                    self.CleanUp()
215                return instance
216
217            # TODO: Remove following code after create_cf deprecated.
218            self.UpdateFetchCvd()
219
220            self.FetchBuild(build_id, branch, build_target, system_build_id,
221                            system_branch, system_build_target, kernel_build_id,
222                            kernel_branch, kernel_build_target)
223            kernel_build = self.GetKernelBuild(kernel_build_id,
224                                               kernel_branch,
225                                               kernel_build_target)
226            self.LaunchCvd(instance,
227                           blank_data_disk_size_gb=blank_data_disk_size_gb,
228                           kernel_build=kernel_build,
229                           boot_timeout_secs=self._boot_timeout_secs)
230
231            return instance
232        except Exception as e:
233            self._all_failures[instance] = e
234            return instance
235
236    def _GetLaunchCvdArgs(self, avd_spec=None, blank_data_disk_size_gb=None,
237                          kernel_build=None, decompress_kernel=None):
238        """Get launch_cvd args.
239
240        Args:
241            avd_spec: An AVDSpec instance.
242            blank_data_disk_size_gb: Size of the blank data disk in GB.
243            kernel_build: String, kernel build info.
244            decompress_kernel: Boolean, if true decompress the kernel.
245
246        Returns:
247            String, args of launch_cvd.
248        """
249        launch_cvd_args = []
250        if blank_data_disk_size_gb > 0:
251            # Policy 'create_if_missing' would create a blank userdata disk if
252            # missing. If already exist, reuse the disk.
253            launch_cvd_args.append(
254                "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING)
255            launch_cvd_args.append(
256                "-blank_data_image_mb=%d" % (blank_data_disk_size_gb * 1024))
257        if avd_spec:
258            launch_cvd_args.append(
259                "-x_res=" + avd_spec.hw_property[constants.HW_X_RES])
260            launch_cvd_args.append(
261                "-y_res=" + avd_spec.hw_property[constants.HW_Y_RES])
262            launch_cvd_args.append(
263                "-dpi=" + avd_spec.hw_property[constants.HW_ALIAS_DPI])
264            if constants.HW_ALIAS_DISK in avd_spec.hw_property:
265                launch_cvd_args.append(
266                    "-data_policy=" + self.DATA_POLICY_CREATE_IF_MISSING)
267                launch_cvd_args.append(
268                    "-blank_data_image_mb="
269                    + avd_spec.hw_property[constants.HW_ALIAS_DISK])
270            if constants.HW_ALIAS_CPUS in avd_spec.hw_property:
271                launch_cvd_args.append(
272                    "-cpus=%s" % avd_spec.hw_property[constants.HW_ALIAS_CPUS])
273            if constants.HW_ALIAS_MEMORY in avd_spec.hw_property:
274                launch_cvd_args.append(
275                    "-memory_mb=%s" % avd_spec.hw_property[constants.HW_ALIAS_MEMORY])
276            if avd_spec.connect_webrtc:
277                launch_cvd_args.append(_WEBRTC_PUBLIC_IP % self._ip.external)
278                launch_cvd_args.extend(_WEBRTC_ARGS)
279        else:
280            resolution = self._resolution.split("x")
281            launch_cvd_args.append("-x_res=" + resolution[0])
282            launch_cvd_args.append("-y_res=" + resolution[1])
283            launch_cvd_args.append("-dpi=" + resolution[3])
284
285        if kernel_build:
286            launch_cvd_args.append("-kernel_path=kernel")
287
288        if self._launch_args:
289            launch_cvd_args.append(self._launch_args)
290
291        if decompress_kernel:
292            launch_cvd_args.append(_DECOMPRESS_KERNEL_ARG)
293
294        if self._gpu:
295            launch_cvd_args.append(_GPU_ARG)
296
297        return launch_cvd_args
298
299    @staticmethod
300    def GetKernelBuild(kernel_build_id, kernel_branch, kernel_build_target):
301        """Get kernel build args for fetch_cvd.
302
303        Args:
304            kernel_branch: Kernel branch name, e.g. "kernel-common-android-4.14"
305            kernel_build_id: Kernel build id, a string, e.g. "223051", "P280427"
306            kernel_build_target: String, Kernel build target name.
307
308        Returns:
309            String of kernel build args for fetch_cvd.
310            If no kernel build then return None.
311        """
312        # kernel_target have default value "kernel". If user provide kernel_build_id
313        # or kernel_branch, then start to process kernel image.
314        if kernel_build_id or kernel_branch:
315            return _ProcessBuild(kernel_build_id, kernel_branch, kernel_build_target)
316        return None
317
318    def StopCvd(self):
319        """Stop CVD.
320
321        If stop_cvd fails, assume that it's because there was no previously
322        running device.
323        """
324        ssh_command = "./bin/stop_cvd"
325        try:
326            self._ssh.Run(ssh_command)
327        except subprocess.CalledProcessError as e:
328            logger.debug("Failed to stop_cvd (possibly no running device): %s", e)
329
330    def CleanUp(self):
331        """Clean up the files/folders on the existing instance.
332
333        If previous AVD have these files/folders, reusing the instance may have
334        side effects if not cleaned. The path in the instance is /home/vsoc-01/*
335        if the GCE user is vsoc-01.
336        """
337
338        ssh_command = "'/bin/rm -rf /home/%s/*'" % self._user
339        try:
340            self._ssh.Run(ssh_command)
341        except subprocess.CalledProcessError as e:
342            logger.debug("Failed to clean up the files/folders: %s", e)
343
344    @utils.TimeExecute(function_description="Launching AVD(s) and waiting for boot up",
345                       result_evaluator=utils.BootEvaluator)
346    def LaunchCvd(self, instance, avd_spec=None,
347                  blank_data_disk_size_gb=None, kernel_build=None,
348                  decompress_kernel=None,
349                  boot_timeout_secs=None):
350        """Launch CVD.
351
352        Launch AVD with launch_cvd. If the process is failed, acloud would show
353        error messages and auto download log files from remote instance.
354
355        Args:
356            instance: String, instance name.
357            avd_spec: An AVDSpec instance.
358            blank_data_disk_size_gb: Size of the blank data disk in GB.
359            kernel_build: String, kernel build info.
360            decompress_kernel: Boolean, if true decompress the kernel.
361            boot_timeout_secs: Integer, the maximum time to wait for the
362                               command to respond.
363
364        Returns:
365           dict of faliures, return this dict for BootEvaluator to handle
366           LaunchCvd success or fail messages.
367        """
368        timestart = time.time()
369        error_msg = ""
370        launch_cvd_args = self._GetLaunchCvdArgs(avd_spec,
371                                                 blank_data_disk_size_gb,
372                                                 kernel_build,
373                                                 decompress_kernel)
374        boot_timeout_secs = boot_timeout_secs or self.BOOT_TIMEOUT_SECS
375        ssh_command = "./bin/launch_cvd -daemon " + " ".join(launch_cvd_args)
376        try:
377            self._ssh.Run(ssh_command, boot_timeout_secs)
378        except (subprocess.CalledProcessError, errors.DeviceConnectionError) as e:
379            # TODO(b/140475060): Distinguish the error is command return error
380            # or timeout error.
381            error_msg = ("Device %s did not finish on boot within timeout (%s secs)"
382                         % (instance, boot_timeout_secs))
383            self._all_failures[instance] = error_msg
384            utils.PrintColorString(str(e), utils.TextColors.FAIL)
385            if avd_spec and not avd_spec.no_pull_log:
386                self._PullAllLogFiles(instance)
387
388        self._execution_time[_LAUNCH_CVD] = round(time.time() - timestart, 2)
389        return {instance: error_msg} if error_msg else {}
390
391    def _PullAllLogFiles(self, instance):
392        """Pull all log files from instance.
393
394        1. Download log files to temp folder.
395        2. Show messages about the download folder for users.
396
397        Args:
398            instance: String, instance name.
399        """
400        log_files = pull.GetAllLogFilePaths(self._ssh)
401        download_folder = pull.GetDownloadLogFolder(instance)
402        pull.PullLogs(self._ssh, log_files, download_folder)
403
404    @utils.TimeExecute(function_description="Reusing GCE instance")
405    def _ReusingGceInstance(self, avd_spec):
406        """Reusing a cuttlefish existing instance.
407
408        Args:
409            avd_spec: An AVDSpec instance.
410
411        Returns:
412            ssh.IP object, that stores internal and external ip of the instance.
413        """
414        gcompute_client.ComputeClient.AddSshRsaInstanceMetadata(
415            self, constants.GCE_USER, avd_spec.cfg.ssh_public_key_path,
416            avd_spec.instance_name_to_reuse)
417        ip = gcompute_client.ComputeClient.GetInstanceIP(
418            self, instance=avd_spec.instance_name_to_reuse, zone=self._zone)
419
420        return ip
421
422    @utils.TimeExecute(function_description="Creating GCE instance")
423    def _CreateGceInstance(self, instance, image_name, image_project,
424                           extra_scopes, boot_disk_size_gb, avd_spec):
425        """Create a single configured cuttlefish device.
426
427        Override method from parent class.
428        Args:
429            instance: String, instance name.
430            image_name: String, the name of the GCE image.
431            image_project: String, the name of the project where the image.
432            extra_scopes: A list of extra scopes to be passed to the instance.
433            boot_disk_size_gb: Integer, size of the boot disk in GB.
434            avd_spec: An AVDSpec instance.
435
436        Returns:
437            ssh.IP object, that stores internal and external ip of the instance.
438        """
439        timestart = time.time()
440        metadata = self._metadata.copy()
441
442        if avd_spec:
443            metadata[constants.INS_KEY_AVD_TYPE] = avd_spec.avd_type
444            metadata[constants.INS_KEY_AVD_FLAVOR] = avd_spec.flavor
445            metadata[constants.INS_KEY_DISPLAY] = ("%sx%s (%s)" % (
446                avd_spec.hw_property[constants.HW_X_RES],
447                avd_spec.hw_property[constants.HW_Y_RES],
448                avd_spec.hw_property[constants.HW_ALIAS_DPI]))
449
450        disk_args = self._GetDiskArgs(
451            instance, image_name, image_project, boot_disk_size_gb)
452        gcompute_client.ComputeClient.CreateInstance(
453            self,
454            instance=instance,
455            image_name=image_name,
456            image_project=image_project,
457            disk_args=disk_args,
458            metadata=metadata,
459            machine_type=self._machine_type,
460            network=self._network,
461            zone=self._zone,
462            gpu=self._gpu,
463            extra_scopes=extra_scopes,
464            tags=["appstreaming"] if (
465                avd_spec and avd_spec.connect_webrtc) else None)
466        ip = gcompute_client.ComputeClient.GetInstanceIP(
467            self, instance=instance, zone=self._zone)
468        logger.debug("'instance_ip': %s", ip.internal
469                     if self._report_internal_ip else ip.external)
470
471        self._execution_time[_GCE_CREATE] = round(time.time() - timestart, 2)
472        return ip
473
474    @utils.TimeExecute(function_description="Uploading build fetcher to instance")
475    def UpdateFetchCvd(self):
476        """Download fetch_cvd from the Build API, and upload it to a remote instance.
477
478        The version of fetch_cvd to use is retrieved from the configuration file. Once fetch_cvd
479        is on the instance, future commands can use it to download relevant Cuttlefish files from
480        the Build API on the instance itself.
481        """
482        # TODO(schuffelen): Support fetch_cvd_version="latest" when there is
483        # stronger automated testing on it.
484        download_dir = tempfile.mkdtemp()
485        download_target = os.path.join(download_dir, _FETCHER_NAME)
486        self._build_api.DownloadArtifact(
487            build_target=_FETCHER_BUILD_TARGET,
488            build_id=self._fetch_cvd_version,
489            resource_id=_FETCHER_NAME,
490            local_dest=download_target,
491            attempt_id="latest")
492        fetch_cvd_stat = os.stat(download_target)
493        os.chmod(download_target, fetch_cvd_stat.st_mode | stat.S_IEXEC)
494        self._ssh.ScpPushFile(src_file=download_target, dst_file=_FETCHER_NAME)
495        os.remove(download_target)
496        os.rmdir(download_dir)
497
498    @utils.TimeExecute(function_description="Downloading build on instance")
499    def FetchBuild(self, build_id, branch, build_target, system_build_id,
500                   system_branch, system_build_target, kernel_build_id,
501                   kernel_branch, kernel_build_target):
502        """Execute fetch_cvd on the remote instance to get Cuttlefish runtime files.
503
504        Args:
505            fetch_args: String of arguments to pass to fetch_cvd.
506        """
507        timestart = time.time()
508        fetch_cvd_args = ["-credential_source=gce"]
509
510        default_build = _ProcessBuild(build_id, branch, build_target)
511        if default_build:
512            fetch_cvd_args.append("-default_build=" + default_build)
513        system_build = _ProcessBuild(system_build_id, system_branch, system_build_target)
514        if system_build:
515            fetch_cvd_args.append("-system_build=" + system_build)
516        kernel_build = self.GetKernelBuild(kernel_build_id,
517                                           kernel_branch,
518                                           kernel_build_target)
519        if kernel_build:
520            fetch_cvd_args.append("-kernel_build=" + kernel_build)
521
522        self._ssh.Run("./fetch_cvd " + " ".join(fetch_cvd_args))
523        self._execution_time[_FETCH_ARTIFACT] = round(time.time() - timestart, 2)
524
525    def GetInstanceIP(self, instance=None):
526        """Override method from parent class.
527
528        It need to get the IP address in the common_operation. If the class
529        already defind the ip address, return the ip address.
530
531        Args:
532            instance: String, representing instance name.
533
534        Returns:
535            ssh.IP object, that stores internal and external ip of the instance.
536        """
537        if self._ip:
538            return self._ip
539        return gcompute_client.ComputeClient.GetInstanceIP(
540            self, instance=instance, zone=self._zone)
541
542    @property
543    def all_failures(self):
544        """Return all_failures"""
545        return self._all_failures
546
547    @property
548    def execution_time(self):
549        """Return execution_time"""
550        return self._execution_time
551