1#!/usr/bin/env python2
2# Copyright 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""library functions to prepare a DUT for lab deployment.
7
8This library will be shared between Autotest and Skylab DUT deployment tools.
9"""
10
11from __future__ import absolute_import
12from __future__ import division
13from __future__ import print_function
14
15import contextlib
16import time
17
18import common
19import logging
20from autotest_lib.client.common_lib import error
21from autotest_lib.client.common_lib import utils
22from autotest_lib.server import hosts
23from autotest_lib.server import site_utils as server_utils
24from autotest_lib.server.hosts import host_info
25from autotest_lib.server.hosts import servo_host
26from autotest_lib.server.hosts import cros_constants
27from autotest_lib.server.hosts import servo_constants
28
29
30_FIRMWARE_UPDATE_TIMEOUT = 600
31# Check battery level with retries.
32# If battery level is low then sleep to 15 minutes.
33_BATTERY_LEVEL_CHECK_RETRIES = 8
34_BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT = 900
35# We expecting that battery will change more than 4% for 15 minutes.
36_BATTERY_LEVEL_CHANGE_IN_ONE_RETRY = 4
37
38
39@contextlib.contextmanager
40def create_cros_host(hostname, board, model, servo_hostname, servo_port,
41                servo_serial=None, logs_dir=None):
42    """Yield a server.hosts.CrosHost object to use for DUT preparation.
43
44    This object contains just enough inventory data to be able to prepare the
45    DUT for lab deployment. It does not contain any reference to AFE / Skylab so
46    that DUT preparation is guaranteed to be isolated from the scheduling
47    infrastructure.
48
49    @param hostname:        FQDN of the host to prepare.
50    @param board:           The autotest board label for the DUT.
51    @param model:           The autotest model label for the DUT.
52    @param servo_hostname:  FQDN of the servo host controlling the DUT.
53    @param servo_port:      Servo host port used for the controlling servo.
54    @param servo_serial:    (Optional) Serial number of the controlling servo.
55    @param logs_dir:        (Optional) Directory to save logs obtained from the
56                            host.
57
58    @yield a server.hosts.Host object.
59    """
60    labels = [
61            'board:%s' % board,
62            'model:%s' % model,
63    ]
64    attributes = {
65            servo_constants.SERVO_HOST_ATTR: servo_hostname,
66            servo_constants.SERVO_PORT_ATTR: servo_port,
67    }
68    if servo_serial is not None:
69        attributes[servo_constants.SERVO_SERIAL_ATTR] = servo_serial
70
71    store = host_info.InMemoryHostInfoStore(info=host_info.HostInfo(
72            labels=labels,
73            attributes=attributes,
74    ))
75    machine_dict = _get_machine_dict(hostname, store)
76    host = hosts.create_host(machine_dict)
77    servohost = servo_host.ServoHost(
78            **servo_host.get_servo_args_for_host(host))
79    _prepare_servo(servohost)
80    host.set_servo_host(servohost)
81    host.servo.uart_logs_dir = logs_dir
82    try:
83        yield host
84    finally:
85        host.close()
86
87
88def _get_machine_dict(hostname, host_info_store):
89    """Helper function to generate a machine_dic to feed hosts.create_host.
90
91    @param hostname
92    @param host_info_store
93
94    @return A dict that hosts.create_host can consume.
95    """
96    return {'hostname': hostname,
97            'host_info_store': host_info_store,
98            'afe_host': server_utils.EmptyAFEHost(),
99            }
100
101
102def download_image_to_servo_usb(host, build):
103    """Download the given image to the USB attached to host's servo.
104
105    @param host   A server.hosts.Host object.
106    @param build  A Chrome OS version string for the build to download.
107    """
108    _, update_url = host.stage_image_for_servo(build)
109    host.servo.image_to_servo_usb(update_url)
110
111
112def try_reset_by_servo(host):
113    """Reboot the DUT by run cold_reset by servo.
114
115    Cold reset implemented as
116    `dut-control -p <SERVO-PORT> power_state:reset`.
117
118    @params host: CrosHost instance with initialized servo instance.
119    """
120    logging.info('Attempting reset via servo...')
121    host.servo.get_power_state_controller().reset()
122
123    logging.info('Waiting for DUT to come back up.')
124    if not host.wait_up(timeout=host.BOOT_TIMEOUT):
125        raise error.AutoservError(
126            'DUT failed to come back after %d seconds' % host.BOOT_TIMEOUT)
127
128
129def power_cycle_via_servo(host, recover_src=False):
130    """Power cycle a host though it's attached servo.
131
132    @param host: A server.hosts.Host object.
133    @param recover_src: Indicate if we need switch servo_v4_role
134           back to src mode.
135    """
136    try:
137        logging.info('Shutting down %s from via ssh.', host.hostname)
138        host.halt()
139    except Exception as e:
140        logging.info('Unable to shutdown DUT via ssh; %s', str(e))
141
142    if recover_src:
143        host.servo.set_servo_v4_role('src')
144
145    logging.info('Power cycling DUT through servo...')
146    host.servo.get_power_state_controller().power_off()
147    host.servo.switch_usbkey('off')
148    time.sleep(host.SHUTDOWN_TIMEOUT)
149    # N.B. The Servo API requires that we use power_on() here
150    # for two reasons:
151    #  1) After turning on a DUT in recovery mode, you must turn
152    #     it off and then on with power_on() once more to
153    #     disable recovery mode (this is a Parrot specific
154    #     requirement).
155    #  2) After power_off(), the only way to turn on is with
156    #     power_on() (this is a Storm specific requirement).
157    time.sleep(host.SHUTDOWN_TIMEOUT)
158    host.servo.get_power_state_controller().power_on()
159
160    logging.info('Waiting for DUT to come back up.')
161    if not host.wait_up(timeout=host.BOOT_TIMEOUT):
162        raise error.AutoservError('DUT failed to come back after %d seconds' %
163                                  host.BOOT_TIMEOUT)
164
165
166def verify_battery_status(host):
167    """Verify that battery status.
168
169    If DUT battery still in the factory mode then DUT required re-work.
170
171    @param host server.hosts.CrosHost object.
172    @raise Exception: if status as unexpected value.
173    """
174    logging.info("Started to verify battery status")
175    host_info = host.host_info_store.get()
176    if host_info.get_label_value('power') != 'battery':
177        logging.info("Skepping due DUT does not have the battery")
178        return
179    power_info = host.get_power_supply_info()
180    battery_path = power_info['Battery']['path']
181    cmd = 'cat %s/status' % battery_path
182    status = host.run(cmd, timeout=30, ignore_status=True).stdout.strip()
183    if status not in ['Charging', 'Discharging', 'Full']:
184        raise Exception(
185                'Unexpected battery status. Please verify that DUT prepared'
186                ' for deployment.')
187
188    # Verify battery level to avoid cases when DUT in factory mode which can
189    # block battery from charging. Retry check will take 8 attempts by
190    # 15 minutes to allow battery to reach required level.
191    battery_level_good = False
192    last_battery_level = 0
193    for _ in range(_BATTERY_LEVEL_CHECK_RETRIES):
194        power_info = host.get_power_supply_info()
195        battery_level = float(power_info['Battery']['percentage'])
196        # Verify if battery reached the required level
197        battery_level_good = battery_level >= cros_constants.MIN_BATTERY_LEVEL
198        if battery_level_good:
199            # Stop retry as battery reached the required level
200            break
201        logging.info(
202                'Battery level %s%% is lower than expected %s%%.'
203                ' Sleep for %s seconds to try again', battery_level,
204                cros_constants.MIN_BATTERY_LEVEL,
205                _BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT)
206        time.sleep(_BATTERY_LEVEL_CHECK_RETRIES_TIMEOUT)
207
208        if last_battery_level > 0:
209            # If level of battery is changing less than 4% per 15 minutes
210            # then we can assume that the battery is not charging as expected
211            # or stuck on some level.
212            battery_level_change = abs(last_battery_level - battery_level)
213            if battery_level_change < _BATTERY_LEVEL_CHANGE_IN_ONE_RETRY:
214                logging.info(
215                        'Battery charged less than 4%% for 15 minutes which'
216                        ' means that something wrong with charging.'
217                        ' Stop retry to charge it. Battery level: %s%%',
218                        battery_level)
219                break
220        last_battery_level = battery_level
221    if not battery_level_good:
222        raise Exception(
223                'Battery is not charged or discharging.'
224                ' Please verify that DUT connected to power and charging.'
225                ' Possible that the DUT is not ready for deployment in lab.')
226    logging.info("Battery status verification passed!")
227
228
229def verify_servo(host):
230    """Verify that we have good Servo.
231
232    The servo_topology and servo_type will be clean up when initiate the
233    deploy process by run add-dut or update-dut.
234    """
235    host_info = host.host_info_store.get()
236    if host_info.os == 'labstation':
237        # skip labstation because they do not have servo
238        return
239    servo_host = host._servo_host
240    if not servo_host:
241        raise Exception('Servo host is not initialized. All DUTs need to have'
242                        ' a stable and working servo.')
243    if host._servo_host.is_servo_topology_supported():
244        servo_topology = host._servo_host.get_topology()
245        if not servo_topology or servo_topology.is_empty():
246            raise Exception(
247                    'Servo topology is not initialized. All DUTs need to have'
248                    ' a stable and working servo.')
249    servo_type = host.servo.get_servo_type()
250    if not servo_type:
251        raise Exception(
252                'The servo_type did not received from Servo. Please verify'
253                ' that Servo is in good state. All DUTs need to have a stable'
254                ' and working servo.')
255    if not host.is_servo_in_working_state():
256        raise Exception(
257                'Servo is not initialized properly or did not passed one or'
258                ' more verifiers. All DUTs need to have a stable and working'
259                ' servo.')
260    host._set_servo_topology()
261    logging.info("Servo initialized and working as expected.")
262
263
264def verify_ccd_testlab_enable(host):
265    """Verify that ccd testlab enable when DUT support cr50.
266
267    The new deploy process required to deploy DUTs with testlab enable when
268    connection to the servo by type-c, so we will be sure that communication
269    by servo is permanent, it's critical for auto-repair capability.
270
271    @param host server.hosts.CrosHost object.
272    """
273
274    host_info = host.host_info_store.get()
275    if host_info.os == 'labstation':
276        # skip labstation because they do not has servo
277        return
278
279    # Only verify for ccd servo connection
280    if host.servo and host.servo.get_main_servo_device() == 'ccd_cr50':
281        if not host.servo.has_control('cr50_testlab'):
282            raise Exception(
283                'CCD connection required support of cr50 on the DUT. Please '
284                'verify which servo need to be used for DUT setup.')
285
286        status = host.servo.get('cr50_testlab')
287        if status == 'on':
288            logging.info("CCD testlab mode is enabled on the DUT.")
289        else:
290            raise Exception(
291                'CCD testlab mode is not enabled on the DUT, enable '
292                'testlab mode is required for all DUTs that support CR50.')
293
294
295def verify_labstation_RPM_config_unsafe(host):
296    """Verify that we can power cycle a labstation with its RPM information.
297    Any host without RPM information will be safely skipped.
298
299    @param host: any host
300
301    This procedure is intended to catch inaccurate RPM info when the
302    host is deployed.
303
304    If the RPM config information is wrong, then this command will fail.
305
306    Note that we do not cleanly stop servod as part of power-cycling the DUT;
307    therefore calling this function is not safe in general.
308
309    """
310    host_info = host.host_info_store.get()
311
312    powerunit_hostname = host_info.attributes.get('powerunit_hostname')
313    powerunit_outlet   = host_info.attributes.get('powerunit_outlet')
314
315    powerunit_hasinfo = (bool(powerunit_hostname), bool(powerunit_outlet))
316
317    if powerunit_hasinfo == (True, True):
318        pass
319    elif powerunit_hasinfo == (False, False):
320        logging.info("intentionally skipping labstation %s", host.hostname)
321        return
322    else:
323        msg = "inconsistent power info: %s %s" % (
324            powerunit_hostname, powerunit_outlet
325        )
326        logging.error(msg)
327        raise Exception(msg)
328
329    logging.info("Shutting down labstation...")
330    host.rpm_power_off_and_wait()
331    host.rpm_power_on_and_wait()
332    logging.info("RPM Check Successful")
333
334
335def verify_boot_into_rec_mode(host):
336    """Verify that we can boot into USB when in recover mode, and reset tpm.
337
338    The new deploy process will install test image before firmware update, so
339    we don't need boot into recovery mode during deploy, but we still want to
340    make sure that DUT can boot into recover mode as it's critical for
341    auto-repair capability.
342
343    @param host   servers.host.Host object.
344    """
345    try:
346        # The DUT could be start with un-sshable state, so do shutdown from
347        # DUT side in a try block.
348        logging.info('Shutting down %s from via ssh.', host.hostname)
349        host.halt()
350    except Exception as e:
351        logging.info('Unable to shutdown DUT via ssh; %s', str(e))
352
353    host.servo.get_power_state_controller().power_off()
354    time.sleep(host.SHUTDOWN_TIMEOUT)
355    logging.info("Booting DUT into recovery mode...")
356    need_snk = host.require_snk_mode_in_recovery()
357    host.servo.boot_in_recovery_mode(snk_mode=need_snk)
358    try:
359        if not host.wait_up(timeout=host.USB_BOOT_TIMEOUT):
360            raise Exception('DUT failed to boot into recovery mode.')
361
362        logging.info('Resetting the TPM status')
363        try:
364            host.run('chromeos-tpm-recovery')
365        except error.AutoservRunError:
366            logging.warn('chromeos-tpm-recovery is too old.')
367    except Exception:
368        # Restore the servo_v4 role to src if we called boot_in_recovery_mode
369        # method with snk_mode=True earlier. If no exception raise, recover
370        # src mode will be handled by power_cycle_via_servo() method.
371        if need_snk:
372            host.servo.set_servo_v4_role('src')
373        raise
374
375    logging.info("Rebooting host into normal mode.")
376    power_cycle_via_servo(host, recover_src=need_snk)
377    logging.info("Verify boot into recovery mode completed successfully.")
378
379
380def install_test_image(host):
381    """Initial install a test image on a DUT.
382
383    This function assumes that the required image is already downloaded onto the
384    USB key connected to the DUT via servo, and the DUT is in dev mode with
385    dev_boot_usb enabled.
386
387    @param host   servers.host.Host object.
388    """
389    servo = host.servo
390    # First power on.  We sleep to allow the firmware plenty of time
391    # to display the dev-mode screen; some boards take their time to
392    # be ready for the ctrl+U after power on.
393    servo.get_power_state_controller().power_off()
394    time.sleep(host.SHUTDOWN_TIMEOUT)
395    servo.switch_usbkey('dut')
396    servo.get_power_state_controller().power_on()
397
398    # Type ctrl+U repeatedly for up to BOOT_TIMEOUT or until DUT boots.
399    boot_deadline = time.time() + host.BOOT_TIMEOUT
400    while time.time() < boot_deadline:
401        logging.info("Pressing ctrl+u")
402        servo.ctrl_u()
403        if host.ping_wait_up(timeout=5):
404            break
405    else:
406        raise Exception('DUT failed to boot from USB for install test image.')
407
408    host.run('chromeos-install --yes', timeout=host.INSTALL_TIMEOUT)
409
410    logging.info("Rebooting DUT to boot from hard drive.")
411    try:
412        host.reboot()
413    except Exception as e:
414        logging.info('Failed to reboot DUT via ssh; %s', str(e))
415        try_reset_by_servo(host)
416    logging.info("Install test image completed successfully.")
417
418
419def reinstall_test_image(host):
420    """Install the test image of given build to DUT.
421
422    This function assumes that the required image is already downloaded onto the
423    USB key connected to the DUT via servo.
424
425    @param host   servers.host.Host object.
426    """
427    host.servo_install()
428
429
430def flash_firmware_using_servo(host, build):
431    """Flash DUT firmware directly using servo.
432
433    Rather than running `chromeos-firmwareupdate` on DUT, we can flash DUT
434    firmware directly using servo (run command `flashrom`, etc. on servo). In
435    this way, we don't require DUT to be in dev mode and with dev_boot_usb
436    enabled."""
437    host.firmware_install(build)
438
439
440def install_firmware(host):
441    """Install dev-signed firmware after removing write-protect.
442
443    At start, it's assumed that hardware write-protect is disabled,
444    the DUT is in dev mode, and the servo's USB stick already has a
445    test image installed.
446
447    The firmware is installed by powering on and typing ctrl+U on
448    the keyboard in order to boot the test image from USB.  Once
449    the DUT is booted, we run a series of commands to install the
450    read-only firmware from the test image.  Then we clear debug
451    mode, and shut down.
452
453    @param host   Host instance to use for servo and ssh operations.
454    """
455    logging.info("Started install firmware on the DUT.")
456    # Disable software-controlled write-protect for both FPROMs, and
457    # install the RO firmware.
458    for fprom in ['host', 'ec']:
459        host.run('flashrom -p %s --wp-disable' % fprom,
460                 ignore_status=True)
461
462    fw_update_log = '/mnt/stateful_partition/home/root/cros-fw-update.log'
463    pid = _start_firmware_update(host, fw_update_log)
464    _wait_firmware_update_process(host, pid)
465    _check_firmware_update_result(host, fw_update_log)
466
467    try:
468        host.reboot()
469    except Exception as e:
470        logging.debug('Failed to reboot the DUT after update firmware; %s', e)
471        try_reset_by_servo(host)
472
473    # Once we confirmed DUT can boot from new firmware, get us out of
474    # dev-mode and clear GBB flags.  GBB flags are non-zero because
475    # boot from USB was enabled.
476    logging.info("Resting gbb flags and disable dev mode.")
477    host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
478             ignore_status=True)
479    host.run('crossystem disable_dev_request=1',
480             ignore_status=True)
481
482    logging.info("Rebooting DUT in normal mode(non-dev).")
483    try:
484        host.reboot()
485    except Exception as e:
486        logging.debug(
487                'Failed to reboot the DUT after switch to'
488                ' non-dev mode; %s', e)
489        try_reset_by_servo(host)
490    logging.info("Install firmware completed successfully.")
491
492
493def _start_firmware_update(host, result_file):
494    """Run `chromeos-firmwareupdate` in background.
495
496    In scenario servo v4 type C, some boards of DUT may lose ethernet
497    connectivity on firmware update. There's no way to bring it back except
498    rebooting the system.
499
500    @param host         Host instance to use for servo and ssh operations.
501    @param result_file  Path on DUT to save operation logs.
502
503    @returns The process id."""
504    # TODO(guocb): Use `make_dev_firmware` to re-sign from MP to test/dev.
505    fw_update_cmd = 'chromeos-firmwareupdate --mode=factory --force'
506
507    cmd = [
508        "date > %s" % result_file,
509        "nohup %s &>> %s" % (fw_update_cmd, result_file),
510        "/usr/local/bin/hooks/check_ethernet.hook"
511    ]
512    return host.run_background(';'.join(cmd))
513
514
515def _wait_firmware_update_process(host, pid, timeout=_FIRMWARE_UPDATE_TIMEOUT):
516    """Wait `chromeos-firmwareupdate` to finish.
517
518    @param host     Host instance to use for servo and ssh operations.
519    @param pid      The process ID of `chromeos-firmwareupdate`.
520    @param timeout  Maximum time to wait for firmware updating.
521    """
522    try:
523        utils.poll_for_condition(
524            lambda: host.run('ps -f -p %s' % pid, timeout=20).exit_status,
525            exception=Exception(
526                    "chromeos-firmwareupdate (pid: %s) didn't complete in %s "
527                    'seconds.' % (pid, timeout)),
528            timeout=_FIRMWARE_UPDATE_TIMEOUT,
529            sleep_interval=10,
530        )
531    except error.AutoservRunError:
532        # We lose the connectivity, so the DUT should be booting up.
533        if not host.wait_up(timeout=host.USB_BOOT_TIMEOUT):
534            raise Exception(
535                    'DUT failed to boot up after firmware updating.')
536
537
538def _check_firmware_update_result(host, result_file):
539    """Check if firmware updating is good or not.
540
541    @param host         Host instance to use for servo and ssh operations.
542    @param result_file  Path of the file saving output of
543                        `chromeos-firmwareupdate`.
544    """
545    fw_update_was_good = ">> DONE: Firmware updater exits successfully."
546    result = host.run('cat %s' % result_file)
547    if result.stdout.rstrip().rsplit('\n', 1)[1] != fw_update_was_good:
548        raise Exception("chromeos-firmwareupdate failed!")
549
550
551def _prepare_servo(servohost):
552    """Prepare servo connected to host for installation steps.
553
554    @param servohost  A server.hosts.servo_host.ServoHost object.
555    """
556    # Stopping `servod` on the servo host will force `repair()` to
557    # restart it.  We want that restart for a few reasons:
558    #   + `servod` caches knowledge about the image on the USB stick.
559    #     We want to clear the cache to force the USB stick to be
560    #     re-imaged unconditionally.
561    #   + If there's a problem with servod that verify and repair
562    #     can't find, this provides a UI through which `servod` can
563    #     be restarted.
564    servohost.run('stop servod PORT=%d' % servohost.servo_port,
565                  ignore_status=True)
566    servohost.repair()
567
568    if not servohost.get_servo().probe_host_usb_dev():
569        raise Exception('No USB stick detected on Servo host')
570
571
572def setup_hwid_and_serialnumber(host):
573    """Do initial setup for ChromeOS host.
574
575    @param host    servers.host.Host object.
576    """
577    if not hasattr(host, 'host_info_store'):
578        raise Exception('%s does not have host_info_store' % host.hostname)
579
580    info = host.host_info_store.get()
581    hwid = host.run('crossystem hwid', ignore_status=True).stdout
582    serial_number = host.run('vpd -g serial_number', ignore_status=True).stdout
583
584    if not hwid and not serial_number:
585        raise Exception(
586                'Failed to retrieve HWID and SerialNumber from host %s' %
587                host.hostname)
588    if not serial_number:
589        raise Exception('Failed to retrieve SerialNumber from host %s' %
590                        host.hostname)
591    if not hwid:
592        raise Exception('Failed to retrieve HWID from host %s' % host.hostname)
593
594    info.attributes['HWID'] = hwid
595    info.attributes['serial_number'] = serial_number
596    if info != host.host_info_store.get():
597        host.host_info_store.commit(info)
598    logging.info("Reading HWID and SerialNumber completed successfully.")
599