1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import time
9
10import common
11from autotest_lib.client.common_lib import error
12from autotest_lib.client.common_lib import global_config
13from autotest_lib.client.common_lib import hosts
14from autotest_lib.client.common_lib.cros import dev_server
15from autotest_lib.client.common_lib.cros import retry
16from autotest_lib.server import afe_utils
17from autotest_lib.server import crashcollect
18from autotest_lib.server.cros import autoupdater
19from autotest_lib.server.cros.dynamic_suite import tools
20from autotest_lib.server.hosts import cros_firmware
21from autotest_lib.server.hosts import repair_utils
22
23# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be
24# in dev mode (usually, those should be unmanaged devices)
25#
26_DEV_MODE_ALLOWED_POOLS = set(
27    global_config.global_config.get_config_value(
28            'CROS',
29            'pools_dev_mode_allowed',
30            type=str,
31            default='',
32            allow_blank=True).split(','))
33
34# Setting to suppress dev mode check; primarily used for moblab where all
35# DUT's are in dev mode.
36_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value(
37            'CROS',
38            'dev_mode_allowed',
39            type=bool,
40            default=False)
41
42# Triggers for the 'au', 'powerwash', and 'usb' repair actions.
43# These are also used as dependencies in the `CrosHost` repair
44# sequence, as follows:
45#
46# au:
47#   - triggers: _CROS_AU_TRIGGERS
48#   - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS
49#
50# powerwash:
51#   - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_AU_TRIGGERS
52#   - depends on: _CROS_USB_TRIGGERS
53#
54# usb:
55#   - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
56#               _CROS_AU_TRIGGERS
57#   - no dependencies
58#
59# N.B. AC power detection depends on software on the DUT, and there
60# have been bugs where detection failed even though the DUT really
61# did have power.  So, we make the 'power' verifier a trigger for
62# reinstall repair actions, too.
63#
64# TODO(jrbarnette):  AU repair can't fix all problems reported by
65# the 'cros' verifier; it's listed as an AU trigger as a
66# simplification.  The ultimate fix is to split the 'cros' verifier
67# into smaller individual verifiers.
68_CROS_AU_TRIGGERS = ('power', 'rwfw', 'python', 'cros',)
69_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_au', 'ext4',)
70_CROS_USB_TRIGGERS = ('ssh', 'writable',)
71
72
73class ACPowerVerifier(hosts.Verifier):
74    """Check for AC power and a reasonable battery charge."""
75
76    def verify(self, host):
77        # pylint: disable=missing-docstring
78        try:
79            info = host.get_power_supply_info()
80        except error.AutoservRunError:
81            raise hosts.AutoservVerifyError(
82                    'Failed to get power supply info')
83
84        try:
85            if info['Line Power']['online'] != 'yes':
86                raise hosts.AutoservVerifyError(
87                        'AC power is not plugged in')
88        except KeyError:
89            raise hosts.AutoservVerifyError(
90                    'Cannot determine AC power status')
91
92        try:
93            if float(info['Battery']['percentage']) < 50.0:
94                raise hosts.AutoservVerifyError(
95                        'Battery is less than 50%')
96        except KeyError:
97            logging.info('Cannot determine battery status - '
98                         'skipping check.')
99
100    @property
101    def description(self):
102        # pylint: disable=missing-docstring
103        return 'The DUT is plugged in to AC power'
104
105
106class WritableVerifier(hosts.Verifier):
107    """
108    Confirm the stateful file systems are writable.
109
110    The standard linux response to certain unexpected file system errors
111    (including hardware errors in block devices) is to change the file
112    system status to read-only.  This checks that that hasn't happened.
113
114    The test covers the two file systems that need to be writable for
115    critical operations like AU:
116      * The (unencrypted) stateful system which includes
117        /mnt/stateful_partition.
118      * The encrypted stateful partition, which includes /var.
119
120    The test doesn't check various bind mounts; those are expected to
121    fail the same way as their underlying main mounts.  Whether the
122    Linux kernel can guarantee that is untested...
123    """
124
125    # N.B. Order matters here:  Encrypted stateful is loop-mounted from
126    # a file in unencrypted stateful, so we don't test for errors in
127    # encrypted stateful if unencrypted fails.
128    _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp']
129
130    def verify(self, host):
131        # pylint: disable=missing-docstring
132        # This deliberately stops looking after the first error.
133        # See above for the details.
134        for testdir in self._TEST_DIRECTORIES:
135            filename = os.path.join(testdir, 'writable_test')
136            command = 'touch %s && rm %s' % (filename, filename)
137            rv = host.run(command=command, ignore_status=True)
138            if rv.exit_status != 0:
139                msg = 'Can\'t create a file in %s' % testdir
140                raise hosts.AutoservVerifyError(msg)
141
142    @property
143    def description(self):
144        # pylint: disable=missing-docstring
145        return 'The stateful filesystems are writable'
146
147
148class EXT4fsErrorVerifier(hosts.Verifier):
149    """
150    Confirm we have not seen critical file system kernel errors.
151    """
152    def verify(self, host):
153        # pylint: disable=missing-docstring
154        # grep for stateful FS errors of the type "EXT4-fs error (device sda1):"
155        command = ("dmesg | grep -E \"EXT4-fs error \(device "
156                   "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | "
157                   "grep -e '^/mnt/stateful_partition ' | "
158                   "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"")
159        output = host.run(command=command, ignore_status=True).stdout
160        if output:
161            sample = output.splitlines()[0]
162            message = 'Saw file system error: %s' % sample
163            raise hosts.AutoservVerifyError(message)
164        # Check for other critical FS errors.
165        command = 'dmesg | grep "This should not happen!!  Data will be lost"'
166        output = host.run(command=command, ignore_status=True).stdout
167        if output:
168            message = 'Saw file system error: Data will be lost'
169            raise hosts.AutoservVerifyError(message)
170        else:
171            logging.error('Could not determine stateful mount.')
172
173    @property
174    def description(self):
175        # pylint: disable=missing-docstring
176        return 'Did not find critical file system errors'
177
178
179class UpdateSuccessVerifier(hosts.Verifier):
180    """
181    Checks that the DUT successfully finished its last provision job.
182
183    At the start of any update (e.g. for a Provision job), the code
184    creates a marker file named `PROVISION_FAILED`.  The file is located
185    in a part of the stateful partition that will be removed if an
186    update finishes successfully.  Thus, the presence of the file
187    indicates that a prior update failed.
188
189    The verifier tests for the existence of the marker file and fails if
190    it still exists.
191    """
192    def verify(self, host):
193        # pylint: disable=missing-docstring
194        result = host.run('test -f %s' % autoupdater.PROVISION_FAILED,
195                          ignore_status=True)
196        if result.exit_status == 0:
197            raise hosts.AutoservVerifyError(
198                    'Last AU on this DUT failed')
199
200    @property
201    def description(self):
202        # pylint: disable=missing-docstring
203        return 'The most recent AU attempt on this DUT succeeded'
204
205
206class TPMStatusVerifier(hosts.Verifier):
207    """Verify that the host's TPM is in a good state."""
208
209    def verify(self, host):
210        # pylint: disable=missing-docstring
211        if _is_virtual_machine(host):
212            # We do not forward host TPM / emulated TPM to qemu VMs, so skip
213            # this verification step.
214            logging.debug('Skipped verification %s on VM', self)
215            return
216
217        try:
218            status = CryptohomeStatus(host)
219        except hosts.AutoservVerifyError:
220            logging.info('Cannot determine the Cryptohome valid status - '
221                         'skipping check.')
222            return
223        try:
224            tpm = status['tpm']
225            if not tpm['enabled']:
226                raise hosts.AutoservVerifyError(
227                        'TPM is not enabled -- Hardware is not working.')
228            if not tpm['can_connect']:
229                raise hosts.AutoservVerifyError(
230                        ('TPM connect failed -- '
231                         'last_error=%d.' % tpm['last_error']))
232            if tpm['owned'] and not tpm['can_load_srk']:
233                raise hosts.AutoservVerifyError(
234                        'Cannot load the TPM SRK')
235            if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']:
236                raise hosts.AutoservVerifyError(
237                        'Cannot load the TPM SRK public key')
238        except KeyError:
239            logging.info('Cannot determine the Crytohome valid status - '
240                         'skipping check.')
241
242    @property
243    def description(self):
244        # pylint: disable=missing-docstring
245        return 'The host\'s TPM is available and working'
246
247
248class PythonVerifier(hosts.Verifier):
249    """Confirm the presence of a working Python interpreter."""
250
251    def verify(self, host):
252        # pylint: disable=missing-docstring
253        result = host.run('python -c "import cPickle"',
254                          ignore_status=True)
255        if result.exit_status != 0:
256            message = 'The python interpreter is broken'
257            if result.exit_status == 127:
258                search = host.run('which python', ignore_status=True)
259                if search.exit_status != 0 or not search.stdout:
260                    message = ('Python is missing; may be caused by '
261                               'powerwash')
262            raise hosts.AutoservVerifyError(message)
263
264    @property
265    def description(self):
266        # pylint: disable=missing-docstring
267        return 'Python on the host is installed and working'
268
269
270class DevModeVerifier(hosts.Verifier):
271    """Verify that the host is not in dev mode."""
272
273    def verify(self, host):
274        # pylint: disable=missing-docstring
275        # Some pools are allowed to be in dev mode
276        info = host.host_info_store.get()
277        if (_DEV_MODE_ALWAYS_ALLOWED or
278                bool(info.pools & _DEV_MODE_ALLOWED_POOLS)):
279            return
280
281        result = host.run('crossystem devsw_boot', ignore_status=True).stdout
282        if result != '0':
283            raise hosts.AutoservVerifyError('The host is in dev mode')
284
285    @property
286    def description(self):
287        # pylint: disable=missing-docstring
288        return 'The host should not be in dev mode'
289
290
291class HWIDVerifier(hosts.Verifier):
292    """Verify that the host has HWID & serial number."""
293
294    def verify(self, host):
295        # pylint: disable=missing-docstring
296        try:
297            info = host.host_info_store.get()
298
299            hwid = host.run('crossystem hwid', ignore_status=True).stdout
300            if hwid:
301                info.attributes['HWID'] = hwid
302
303            serial_number = host.run('vpd -g serial_number',
304                                     ignore_status=True).stdout
305            if serial_number:
306                info.attributes['serial_number'] = serial_number
307
308            if info != host.host_info_store.get():
309                host.host_info_store.commit(info)
310        except Exception as e:
311            logging.exception('Failed to get HWID & Serial Number for host ',
312                              '%s: %s', host.hostname, str(e))
313
314    @property
315    def description(self):
316        # pylint: disable=missing-docstring
317        return 'The host should have valid HWID and Serial Number'
318
319
320class JetstreamTpmVerifier(hosts.Verifier):
321    """Verify that Jetstream TPM is in a good state."""
322
323    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
324    def verify(self, host):
325        # pylint: disable=missing-docstring
326        try:
327            status = CryptohomeStatus(host)
328            if not status.tpm_enabled:
329                raise hosts.AutoservVerifyError('TPM is not enabled')
330            if not status.tpm_owned:
331                raise hosts.AutoservVerifyError('TPM is not owned')
332            if not status.tpm_can_load_srk:
333                raise hosts.AutoservVerifyError('TPM cannot load SRK')
334            if not status.tpm_can_load_srk_pubkey:
335                raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey')
336
337            # Check that the TPM is fully initialized. The output of this
338            # command is line-oriented property/value pairs.
339            result = host.run('cryptohome --action=tpm_status')
340            if 'TPM Ready: true' not in result.stdout:
341                raise hosts.AutoservVerifyError('TPM is not ready')
342        except error.AutoservRunError:
343            raise hosts.AutoservVerifyError(
344                    'Could not determine TPM status')
345
346    @property
347    def description(self):
348        # pylint: disable=missing-docstring
349        return 'Jetstream TPM state check'
350
351
352class JetstreamAttestationVerifier(hosts.Verifier):
353    """Verify that Jetstream attestation client has a certificate."""
354
355    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
356    def verify(self, host):
357        # pylint: disable=missing-docstring
358        try:
359            # This output is in text protobuf format.
360            result = host.run('cryptohome --action=tpm_more_status')
361            if 'attestation_prepared: true' not in result.stdout:
362                raise hosts.AutoservVerifyError(
363                        'Attestation has not been prepared')
364
365            result = host.run('cryptohome --action=tpm_attestation_get_ek')
366            if 'EK Certificate' not in result.stdout:
367                raise hosts.AutoservVerifyError(
368                        'Endorsement certificate not found')
369        except error.AutoservRunError:
370            raise hosts.AutoservVerifyError(
371                    'Unable to fetch endorsement certificate')
372
373    @property
374    def description(self):
375        # pylint: disable=missing-docstring
376        return 'Jetstream attestation endorsement check'
377
378
379class JetstreamServicesVerifier(hosts.Verifier):
380    """Verify that Jetstream services are running."""
381
382    # Retry for b/62576902
383    @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10)
384    def verify(self, host):
385        # pylint: disable=missing-docstring
386        try:
387            if not host.upstart_status('ap-controller'):
388                raise hosts.AutoservVerifyError(
389                    'ap-controller service is not running')
390        except error.AutoservRunError:
391            raise hosts.AutoservVerifyError(
392                'ap-controller service not found')
393
394        try:
395            host.run('pgrep ap-controller')
396        except error.AutoservRunError:
397            raise hosts.AutoservVerifyError(
398                'ap-controller process is not running')
399
400    @property
401    def description(self):
402        # pylint: disable=missing-docstring
403        return 'Jetstream services must be running'
404
405
406class KvmExistsVerifier(hosts.Verifier):
407    """Verify that /dev/kvm exists if it should be there"""
408
409    def verify(self, host):
410        # pylint: disable=missing-docstring
411        result = host.run('[ ! -e /dev/kvm -a -f /usr/bin/vm_concierge ]',
412                          ignore_status=True)
413        if result.exit_status == 0:
414            raise hosts.AutoservVerifyError('/dev/kvm is missing')
415
416    @property
417    def description(self):
418        # pylint: disable=missing-docstring
419        return '/dev/kvm should exist if device supports Linux VMs'
420
421
422class _ResetRepairAction(hosts.RepairAction):
423    """Common handling for repair actions that reset a DUT."""
424
425    def _collect_logs(self, host):
426        """Collect logs from a successfully repaired DUT."""
427        dirname = 'after_%s' % self.tag
428        local_log_dir = crashcollect.get_crashinfo_dir(host, dirname)
429        host.collect_logs('/var/log', local_log_dir, ignore_errors=True)
430        # Collect crash info.
431        crashcollect.get_crashinfo(host, None)
432
433    def _check_reset_success(self, host):
434        """Check whether reset succeeded, and gather logs if possible."""
435        if host.wait_up(host.BOOT_TIMEOUT):
436            try:
437                # Collect logs once we regain ssh access before
438                # clobbering them.
439                self._collect_logs(host)
440            except Exception:
441                # If the DUT is up, we want to declare success, even if
442                # log gathering fails for some reason.  So, if there's
443                # a failure, just log it and move on.
444                logging.exception('Non-critical failure in log '
445                                  'collection during %s.',
446                                  self.tag)
447            return
448        raise hosts.AutoservRepairError(
449                'Host %s is still offline after %s.' %
450                (host.hostname, self.tag), 'failed_to_boot_after_' + self.tag)
451
452
453class ServoSysRqRepair(_ResetRepairAction):
454    """
455    Repair a Chrome device by sending a system request to the kernel.
456
457    Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x)
458    will ask the kernel to panic itself and reboot while conserving
459    the kernel logs in console ramoops.
460    """
461
462    def repair(self, host):
463        # pylint: disable=missing-docstring
464        repair_utils.require_servo(host)
465        # Press 3 times Alt+VolUp+X
466        # no checking DUT health between each press as
467        # killing Chrome is not really likely to fix the DUT SSH.
468        for _ in range(3):
469            try:
470                host.servo.sysrq_x()
471            except error.TestFail, ex:
472                raise hosts.AutoservRepairError(
473                      'cannot press sysrq-x: %s.' % str(ex),
474                      'cannot_press_sysrq_x')
475            # less than 5 seconds between presses.
476            time.sleep(2.0)
477        self._check_reset_success(host)
478
479    @property
480    def description(self):
481        # pylint: disable=missing-docstring
482        return 'Reset the DUT via keyboard sysrq-x'
483
484
485class ServoResetRepair(_ResetRepairAction):
486    """Repair a Chrome device by resetting it with servo."""
487
488    def repair(self, host):
489        # pylint: disable=missing-docstring
490        repair_utils.require_servo(host)
491        host.servo.get_power_state_controller().reset()
492        self._check_reset_success(host)
493
494    @property
495    def description(self):
496        # pylint: disable=missing-docstring
497        return 'Reset the DUT via servo'
498
499
500class CrosRebootRepair(repair_utils.RebootRepair):
501    """Repair a CrOS target by clearing dev mode and rebooting it."""
502
503    def repair(self, host):
504        # pylint: disable=missing-docstring
505        # N.B. We need to reboot regardless of whether clearing
506        # dev_mode succeeds or fails.
507        host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
508                 ignore_status=True)
509        host.run('crossystem disable_dev_request=1',
510                 ignore_status=True)
511        super(CrosRebootRepair, self).repair(host)
512
513    @property
514    def description(self):
515        # pylint: disable=missing-docstring
516        return 'Reset GBB flags and Reboot the host'
517
518
519class AutoUpdateRepair(hosts.RepairAction):
520    """
521    Repair by re-installing a test image using autoupdate.
522
523    Try to install the DUT's designated "stable test image" using the
524    standard procedure for installing a new test image via autoupdate.
525    """
526
527    def repair(self, host):
528        # pylint: disable=missing-docstring
529        image_name = host.get_cros_repair_image_name()
530        logging.info('Staging build for AU: %s', image_name)
531        devserver = dev_server.ImageServer.resolve(image_name, host.hostname)
532        devserver.trigger_download(image_name, synchronous=False)
533        update_url = tools.image_url_pattern() % (
534                devserver.url(), image_name)
535        afe_utils.machine_install_and_update_labels(host, update_url)
536
537    @property
538    def description(self):
539        # pylint: disable=missing-docstring
540        return 'Re-install the stable build via AU'
541
542
543class PowerWashRepair(AutoUpdateRepair):
544    """
545    Powerwash the DUT, then re-install using autoupdate.
546
547    Powerwash the DUT, then attempt to re-install a stable test image as
548    for `AutoUpdateRepair`.
549    """
550
551    def repair(self, host):
552        # pylint: disable=missing-docstring
553        host.run('echo "fast safe" > '
554                 '/mnt/stateful_partition/factory_install_reset')
555        host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True)
556        super(PowerWashRepair, self).repair(host)
557
558    @property
559    def description(self):
560        # pylint: disable=missing-docstring
561        return 'Powerwash and then re-install the stable build via AU'
562
563
564class ServoInstallRepair(hosts.RepairAction):
565    """
566    Reinstall a test image from USB using servo.
567
568    Use servo to re-install the DUT's designated "stable test image"
569    from servo-attached USB storage.
570    """
571
572    def repair(self, host):
573        # pylint: disable=missing-docstring
574        repair_utils.require_servo(host)
575        host.servo_install(host.stage_image_for_servo())
576
577    @property
578    def description(self):
579        # pylint: disable=missing-docstring
580        return 'Reinstall from USB using servo'
581
582
583class ColdRebootRepair(_ResetRepairAction):
584    """
585    Repair a Chrome device by performing a cold reboot that resets the EC.
586
587    Use ectool to perform a cold reboot which will reset the EC.
588    """
589
590    def repair(self, host):
591        # pylint: disable=missing-docstring
592        host.reboot(reboot_cmd='ectool reboot_ec cold')
593        self._check_reset_success(host)
594
595    @property
596    def description(self):
597        # pylint: disable=missing-docstring
598        return 'Reset the DUT via cold reboot with ectool'
599
600
601class JetstreamTpmRepair(hosts.RepairAction):
602    """Repair by resetting TPM and rebooting."""
603
604    def repair(self, host):
605        # pylint: disable=missing-docstring
606        host.run('rm -f /var/cache/ap/setup-network', ignore_status=True)
607        host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True)
608        host.run('rm -f /home/.shadow/.can_attempt_ownership',
609                 ignore_status=True)
610        host.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
611        host.reboot()
612
613    @property
614    def description(self):
615        # pylint: disable=missing-docstring
616        return 'Reset TPM and reboot'
617
618
619class JetstreamServiceRepair(hosts.RepairAction):
620    """Repair by restarting Jetstream services."""
621
622    def repair(self, host):
623        # pylint: disable=missing-docstring
624        host.cleanup_services()
625
626    @property
627    def description(self):
628        # pylint: disable=missing-docstring
629        return 'Restart Jetstream services'
630
631
632def _cros_verify_dag():
633    """Return the verification DAG for a `CrosHost`."""
634    FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier
635    FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
636    verify_dag = (
637        (repair_utils.SshVerifier,        'ssh',      ()),
638        (DevModeVerifier,                 'devmode',  ('ssh',)),
639        (HWIDVerifier,                    'hwid',     ('ssh',)),
640        (ACPowerVerifier,                 'power',    ('ssh',)),
641        (EXT4fsErrorVerifier,             'ext4',     ('ssh',)),
642        (WritableVerifier,                'writable', ('ssh',)),
643        (TPMStatusVerifier,               'tpm',      ('ssh',)),
644        (UpdateSuccessVerifier,           'good_au',  ('ssh',)),
645        (FirmwareStatusVerifier,          'fwstatus', ('ssh',)),
646        (FirmwareVersionVerifier,         'rwfw',     ('ssh',)),
647        (PythonVerifier,                  'python',   ('ssh',)),
648        (repair_utils.LegacyHostVerifier, 'cros',     ('ssh',)),
649        (KvmExistsVerifier,               'ec_reset', ('ssh',)),
650    )
651    return verify_dag
652
653
654def _cros_basic_repair_actions():
655    """Return the basic repair actions for a `CrosHost`"""
656    FirmwareRepair = cros_firmware.FirmwareRepair
657    repair_actions = (
658        # RPM cycling must precede Servo reset:  if the DUT has a dead
659        # battery, we need to reattach AC power before we reset via servo.
660        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
661        (ServoSysRqRepair, 'sysrq', (), ('ssh',)),
662        (ServoResetRepair, 'servoreset', (), ('ssh',)),
663
664        # N.B. FirmwareRepair can't fix a 'good_au' failure directly,
665        # because it doesn't remove the flag file that triggers the
666        # failure.  We include it as a repair trigger because it's
667        # possible the the last update failed because of the firmware,
668        # and we want the repair steps below to be able to trust the
669        # firmware.
670        (FirmwareRepair, 'firmware', (), ('ssh', 'fwstatus', 'good_au',)),
671
672        (CrosRebootRepair, 'reboot', ('ssh',), ('devmode', 'writable',)),
673
674        (ColdRebootRepair, 'coldboot', ('ssh',), ('ec_reset',)),
675    )
676    return repair_actions
677
678
679def _cros_extended_repair_actions(au_triggers=_CROS_AU_TRIGGERS,
680                                  powerwash_triggers=_CROS_POWERWASH_TRIGGERS,
681                                  usb_triggers=_CROS_USB_TRIGGERS):
682    """Return the extended repair actions for a `CrosHost`"""
683
684    # The dependencies and triggers for the 'au', 'powerwash', and 'usb'
685    # repair actions stack up:  Each one is able to repair progressively
686    # more verifiers than the one before.  The 'triggers' lists specify
687    # the progression.
688
689    repair_actions = (
690        (AutoUpdateRepair, 'au',
691                usb_triggers + powerwash_triggers, au_triggers),
692        (PowerWashRepair, 'powerwash',
693                usb_triggers, powerwash_triggers + au_triggers),
694        (ServoInstallRepair, 'usb',
695                (), usb_triggers + powerwash_triggers + au_triggers),
696    )
697    return repair_actions
698
699
700def _cros_repair_actions():
701    """Return the repair actions for a `CrosHost`."""
702    repair_actions = (_cros_basic_repair_actions() +
703                      _cros_extended_repair_actions())
704    return repair_actions
705
706
707def create_cros_repair_strategy():
708    """Return a `RepairStrategy` for a `CrosHost`."""
709    verify_dag = _cros_verify_dag()
710    repair_actions = _cros_repair_actions()
711    return hosts.RepairStrategy(verify_dag, repair_actions, 'cros')
712
713
714def _moblab_verify_dag():
715    """Return the verification DAG for a `MoblabHost`."""
716    FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
717    verify_dag = (
718        (repair_utils.SshVerifier,        'ssh',     ()),
719        (ACPowerVerifier,                 'power',   ('ssh',)),
720        (FirmwareVersionVerifier,         'rwfw',    ('ssh',)),
721        (PythonVerifier,                  'python',  ('ssh',)),
722        (repair_utils.LegacyHostVerifier, 'cros',    ('ssh',)),
723    )
724    return verify_dag
725
726
727def _moblab_repair_actions():
728    """Return the repair actions for a `MoblabHost`."""
729    repair_actions = (
730        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
731        (AutoUpdateRepair, 'au', ('ssh',), _CROS_AU_TRIGGERS),
732    )
733    return repair_actions
734
735
736def create_moblab_repair_strategy():
737    """
738    Return a `RepairStrategy` for a `MoblabHost`.
739
740    Moblab is a subset of the CrOS verify and repair.  Several pieces
741    are removed because they're not expected to be meaningful.  Some
742    others are removed for more specific reasons:
743
744    'tpm':  Moblab DUTs don't run the tests that matter to this
745        verifier.  TODO(jrbarnette)  This assertion is unproven.
746
747    'good_au':  This verifier can't pass, because the Moblab AU
748        procedure doesn't properly delete the PROVISION_FAILED file.
749        TODO(jrbarnette) We should refactor ChromiumOSUpdater so
750        that it can be different for Moblab.
751
752    'firmware':  Moblab DUTs shouldn't be in FAFT pools, so we don't try
753        this.
754
755    'powerwash':  Powerwash on Moblab causes trouble with deleting the
756        DHCP leases file, so we skip it.
757    """
758    verify_dag = _moblab_verify_dag()
759    repair_actions = _moblab_repair_actions()
760    return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab')
761
762
763def _jetstream_repair_actions():
764    """Return the repair actions for a `JetstreamHost`."""
765    au_triggers = _CROS_AU_TRIGGERS
766    jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation')
767    jetstream_service_triggers = (jetstream_tpm_triggers +
768                                  ('jetstream_services',))
769    repair_actions = (
770        _cros_basic_repair_actions() +
771        (
772            (JetstreamTpmRepair, 'jetstream_tpm_repair',
773             _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS,
774             au_triggers + jetstream_tpm_triggers),
775
776            (JetstreamServiceRepair, 'jetstream_service_repair',
777             _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + (
778                 'jetstream_tpm', 'jetstream_attestation'),
779             au_triggers + jetstream_service_triggers),
780        ) +
781        _cros_extended_repair_actions(
782            au_triggers=au_triggers + jetstream_service_triggers))
783    return repair_actions
784
785
786def _jetstream_verify_dag():
787    """Return the verification DAG for a `JetstreamHost`."""
788    verify_dag = _cros_verify_dag() + (
789        (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)),
790        (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)),
791        (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)),
792    )
793    return verify_dag
794
795
796def create_jetstream_repair_strategy():
797    """
798    Return a `RepairStrategy` for a `JetstreamHost`.
799
800    The Jetstream repair strategy is based on the CrOS verify and repair,
801    but adds the JetstreamServicesVerifier.
802    """
803    verify_dag = _jetstream_verify_dag()
804    repair_actions = _jetstream_repair_actions()
805    return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream')
806
807
808# TODO(pprabhu) Move this to a better place. I have no idea what that place
809# would be.
810def _is_virtual_machine(host):
811    """Determine whether the given |host| is a virtual machine.
812
813    @param host: a hosts.Host object.
814    @returns True if the host is a virtual machine, False otherwise.
815    """
816    output = host.run('cat /proc/cpuinfo | grep "model name"',
817                      ignore_status=True)
818    return (output.exit_status == 0 and output.stdout and
819            'qemu' in output.stdout.lower())
820
821
822class CryptohomeStatus(dict):
823    """Wrapper for getting cryptohome status from a host."""
824
825    def __init__(self, host):
826        super(CryptohomeStatus, self).__init__()
827        self.update(_get_cryptohome_status(host))
828        self.tpm = self['tpm']
829
830    @property
831    def tpm_enabled(self):
832        # pylint: disable=missing-docstring
833        return self.tpm.get('enabled') == True
834
835    @property
836    def tpm_owned(self):
837        # pylint: disable=missing-docstring
838        return self.tpm.get('owned') == True
839
840    @property
841    def tpm_can_load_srk(self):
842        # pylint: disable=missing-docstring
843        return self.tpm.get('can_load_srk') == True
844
845    @property
846    def tpm_can_load_srk_pubkey(self):
847        # pylint: disable=missing-docstring
848        return self.tpm.get('can_load_srk_pubkey') == True
849
850
851def _get_cryptohome_status(host):
852    """Returns a dictionary containing the cryptohome status.
853
854    @param host: a hosts.Host object.
855    @returns A dictionary containing the cryptohome status.
856    @raises AutoservVerifyError: if the output could not be parsed or the TPM
857       status is missing.
858    @raises hosts.AutoservRunError: if the cryptohome command failed.
859    """
860    # This cryptohome command emits status information in JSON format. It
861    # looks something like this:
862    # {
863    #    "installattrs": {
864    #       ...
865    #    },
866    #    "mounts": [ {
867    #       ...
868    #    } ],
869    #    "tpm": {
870    #       "being_owned": false,
871    #       "can_connect": true,
872    #       "can_decrypt": false,
873    #       "can_encrypt": false,
874    #       "can_load_srk": true,
875    #       "can_load_srk_pubkey": true,
876    #       "enabled": true,
877    #       "has_context": true,
878    #       "has_cryptohome_key": false,
879    #       "has_key_handle": false,
880    #       "last_error": 0,
881    #       "owned": true
882    #    }
883    # }
884    try:
885        output = host.run('cryptohome --action=status').stdout.strip()
886        status = json.loads(output)
887        if 'tpm' not in status:
888            raise hosts.AutoservVerifyError('TPM status is missing')
889        return status
890    except ValueError:
891        raise hosts.AutoservVerifyError('Unable to parse cryptohome status')
892