1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
12import httplib
13import logging
14import socket
15import traceback
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import control_data
20from autotest_lib.client.common_lib import error
21from autotest_lib.client.common_lib import global_config
22from autotest_lib.client.common_lib import host_states
23from autotest_lib.client.common_lib import hosts
24from autotest_lib.client.common_lib import lsbrelease_utils
25from autotest_lib.client.common_lib.cros import autoupdater
26from autotest_lib.client.common_lib.cros import dev_server
27from autotest_lib.client.common_lib.cros import retry
28from autotest_lib.client.common_lib.cros.graphite import autotest_es
29from autotest_lib.client.common_lib.cros.network import ping_runner
30from autotest_lib.client.cros import constants as client_constants
31from autotest_lib.server import afe_utils
32from autotest_lib.server import site_utils as server_site_utils
33from autotest_lib.server.cros import dnsname_mangler
34from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
35from autotest_lib.server.cros.dynamic_suite import control_file_getter
36from autotest_lib.server.cros.servo import servo
37from autotest_lib.server.hosts import servo_repair
38from autotest_lib.server.hosts import ssh_host
39from autotest_lib.site_utils.rpm_control_system import rpm_client
40
41try:
42    from chromite.lib import metrics
43except ImportError:
44    metrics = utils.metrics_mock
45
46
47# Names of the host attributes in the database that represent the values for
48# the servo_host and servo_port for a servo connected to the DUT.
49SERVO_HOST_ATTR = 'servo_host'
50SERVO_PORT_ATTR = 'servo_port'
51SERVO_BOARD_ATTR = 'servo_board'
52SERVO_SERIAL_ATTR = 'servo_serial'
53
54_CONFIG = global_config.global_config
55ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
56        'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
57
58AUTOTEST_BASE = _CONFIG.get_config_value(
59        'SCHEDULER', 'drone_installation_directory',
60        default='/usr/local/autotest')
61
62_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot'
63_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot'
64
65class ServoHost(ssh_host.SSHHost):
66    """Host class for a host that controls a servo, e.g. beaglebone."""
67
68    DEFAULT_PORT = 9999
69
70    # Timeout for initializing servo signals.
71    INITIALIZE_SERVO_TIMEOUT_SECS = 30
72
73    # Ready test function
74    SERVO_READY_METHOD = 'get_version'
75
76    REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f'
77
78
79    def _initialize(self, servo_host='localhost',
80                    servo_port=DEFAULT_PORT, servo_board=None,
81                    servo_serial=None, is_in_lab=None, *args, **dargs):
82        """Initialize a ServoHost instance.
83
84        A ServoHost instance represents a host that controls a servo.
85
86        @param servo_host: Name of the host where the servod process
87                           is running.
88        @param servo_port: Port the servod process is listening on.
89        @param servo_board: Board that the servo is connected to.
90        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
91                          to None, for which utils.host_is_in_lab_zone will be
92                          called to check if the servo host is in Cros lab.
93
94        """
95        super(ServoHost, self)._initialize(hostname=servo_host,
96                                           *args, **dargs)
97        self.servo_port = servo_port
98        self.servo_board = servo_board
99        self.servo_serial = servo_serial
100        self._servo = None
101        self._repair_strategy = (
102                servo_repair.create_servo_repair_strategy())
103        self._is_localhost = (self.hostname == 'localhost')
104        if self._is_localhost:
105            self._is_in_lab = False
106        elif is_in_lab is None:
107            self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
108        else:
109            self._is_in_lab = is_in_lab
110
111        # Commands on the servo host must be run by the superuser.
112        # Our account on a remote host is root, but if our target is
113        # localhost then we might be running unprivileged.  If so,
114        # `sudo` will have to be added to the commands.
115        if self._is_localhost:
116            self._sudo_required = utils.system_output('id -u') != '0'
117        else:
118            self._sudo_required = False
119
120
121    def connect_servo(self):
122        """Establish a connection to the servod server on this host.
123
124        Initializes `self._servo` and then verifies that all network
125        connections are working.  This will create an ssh tunnel if
126        it's required.
127
128        As a side effect of testing the connection, all signals on the
129        target servo are reset to default values, and the USB stick is
130        set to the neutral (off) position.
131        """
132        servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
133        timeout, _ = retry.timeout(
134                servo_obj.initialize_dut,
135                timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
136        if timeout:
137            raise hosts.AutoservVerifyError(
138                    'Servo initialize timed out.')
139        self._servo = servo_obj
140
141
142    def disconnect_servo(self):
143        """Disconnect our servo if it exists.
144
145        If we've previously successfully connected to our servo,
146        disconnect any established ssh tunnel, and set `self._servo`
147        back to `None`.
148        """
149        if self._servo:
150            # N.B. This call is safe even without a tunnel:
151            # rpc_server_tracker.disconnect() silently ignores
152            # unknown ports.
153            self.rpc_server_tracker.disconnect(self.servo_port)
154            self._servo = None
155
156
157    def is_in_lab(self):
158        """Check whether the servo host is a lab device.
159
160        @returns: True if the servo host is in Cros Lab, otherwise False.
161
162        """
163        return self._is_in_lab
164
165
166    def is_localhost(self):
167        """Checks whether the servo host points to localhost.
168
169        @returns: True if it points to localhost, otherwise False.
170
171        """
172        return self._is_localhost
173
174
175    def get_servod_server_proxy(self):
176        """Return a proxy that can be used to communicate with servod server.
177
178        @returns: An xmlrpclib.ServerProxy that is connected to the servod
179                  server on the host.
180        """
181        if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
182            return self.rpc_server_tracker.xmlrpc_connect(
183                    None, self.servo_port,
184                    ready_test_name=self.SERVO_READY_METHOD,
185                    timeout_seconds=60)
186        else:
187            remote = 'http://%s:%s' % (self.hostname, self.servo_port)
188            return xmlrpclib.ServerProxy(remote)
189
190
191    def is_cros_host(self):
192        """Check if a servo host is running chromeos.
193
194        @return: True if the servo host is running chromeos.
195            False if it isn't, or we don't have enough information.
196        """
197        try:
198            result = self.run('grep -q CHROMEOS /etc/lsb-release',
199                              ignore_status=True, timeout=10)
200        except (error.AutoservRunError, error.AutoservSSHTimeout):
201            return False
202        return result.exit_status == 0
203
204
205    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
206                         connect_timeout=None, alive_interval=None):
207        """Override default make_ssh_command to use tuned options.
208
209        Tuning changes:
210          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
211          connection failure. Consistency with remote_access.py.
212
213          - ServerAliveInterval=180; which causes SSH to ping connection every
214          180 seconds. In conjunction with ServerAliveCountMax ensures
215          that if the connection dies, Autotest will bail out quickly.
216
217          - ServerAliveCountMax=3; consistency with remote_access.py.
218
219          - ConnectAttempts=4; reduce flakiness in connection errors;
220          consistency with remote_access.py.
221
222          - UserKnownHostsFile=/dev/null; we don't care about the keys.
223
224          - SSH protocol forced to 2; needed for ServerAliveInterval.
225
226        @param user User name to use for the ssh connection.
227        @param port Port on the target host to use for ssh connection.
228        @param opts Additional options to the ssh command.
229        @param hosts_file Ignored.
230        @param connect_timeout Ignored.
231        @param alive_interval Ignored.
232
233        @returns: An ssh command with the requested settings.
234
235        """
236        base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
237                        ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
238                        ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
239                        ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
240                        ' -o Protocol=2 -l %s -p %d')
241        return base_command % (opts, user, port)
242
243
244    def _make_scp_cmd(self, sources, dest):
245        """Format scp command.
246
247        Given a list of source paths and a destination path, produces the
248        appropriate scp command for encoding it. Remote paths must be
249        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
250        to allow additional ssh options.
251
252        @param sources: A list of source paths to copy from.
253        @param dest: Destination path to copy to.
254
255        @returns: An scp command that copies |sources| on local machine to
256                  |dest| on the remote servo host.
257
258        """
259        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
260                   '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
261        return command % (self.master_ssh_option,
262                          self.port, ' '.join(sources), dest)
263
264
265    def run(self, command, timeout=3600, ignore_status=False,
266            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
267            connect_timeout=30, ssh_failure_retry_ok=False,
268            options='', stdin=None, verbose=True, args=()):
269        """Run a command on the servo host.
270
271        Extends method `run` in SSHHost. If the servo host is a remote device,
272        it will call `run` in SSHost without changing anything.
273        If the servo host is 'localhost', it will call utils.system_output.
274
275        @param command: The command line string.
276        @param timeout: Time limit in seconds before attempting to
277                        kill the running process. The run() function
278                        will take a few seconds longer than 'timeout'
279                        to complete if it has to kill the process.
280        @param ignore_status: Do not raise an exception, no matter
281                              what the exit code of the command is.
282        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
283        @param connect_timeout: SSH connection timeout (in seconds)
284                                Ignored if host is 'localhost'.
285        @param options: String with additional ssh command options
286                        Ignored if host is 'localhost'.
287        @param ssh_failure_retry_ok: when True and ssh connection failure is
288                                     suspected, OK to retry command (but not
289                                     compulsory, and likely not needed here)
290        @param stdin: Stdin to pass (a string) to the executed command.
291        @param verbose: Log the commands.
292        @param args: Sequence of strings to pass as arguments to command by
293                     quoting them in " and escaping their contents if necessary.
294
295        @returns: A utils.CmdResult object.
296
297        @raises AutoservRunError if the command failed.
298        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
299                when servo host is not 'localhost'.
300
301        """
302        run_args = {'command': command, 'timeout': timeout,
303                    'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
304                    'stderr_tee': stderr_tee, 'stdin': stdin,
305                    'verbose': verbose, 'args': args}
306        if self.is_localhost():
307            if self._sudo_required:
308                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
309                        command)
310            try:
311                return utils.run(**run_args)
312            except error.CmdError as e:
313                logging.error(e)
314                raise error.AutoservRunError('command execution error',
315                                             e.result_obj)
316        else:
317            run_args['connect_timeout'] = connect_timeout
318            run_args['options'] = options
319            return super(ServoHost, self).run(**run_args)
320
321
322    def _get_release_version(self):
323        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
324
325        @returns The version string in lsb-release, under attribute
326                 CHROMEOS_RELEASE_VERSION.
327        """
328        lsb_release_content = self.run(
329                    'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
330        return lsbrelease_utils.get_chromeos_release_version(
331                    lsb_release_content=lsb_release_content)
332
333
334    def get_attached_duts(self, afe):
335        """Gather a list of duts that use this servo host.
336
337        @param afe: afe instance.
338
339        @returns list of duts.
340        """
341        return afe.get_hosts_by_attribute(
342                attribute=SERVO_HOST_ATTR, value=self.hostname)
343
344
345    def get_board(self):
346        """Determine the board for this servo host.
347
348        @returns a string representing this servo host's board.
349        """
350        return lsbrelease_utils.get_current_board(
351                lsb_release_content=self.run('cat /etc/lsb-release').stdout)
352
353
354    def _choose_dut_for_synchronized_reboot(self, dut_list, afe):
355        """Choose which dut to schedule servo host reboot job.
356
357        We'll want a semi-deterministic way of selecting which host should be
358        scheduled for the servo host reboot job.  For now we'll sort the
359        list with the expectation the dut list will stay consistent.
360        From there we'll grab the first dut that is available so we
361        don't schedule a job on a dut that will never run.
362
363        @param dut_list:  List of the dut hostnames to choose from.
364        @param afe:       Instance of the AFE.
365
366        @return hostname of dut to schedule job on.
367        """
368        afe_hosts = afe.get_hosts(dut_list)
369        afe_hosts.sort()
370        for afe_host in afe_hosts:
371            if afe_host.status not in host_states.UNAVAILABLE_STATES:
372                return afe_host.hostname
373        # If they're all unavailable, just return the first sorted dut.
374        dut_list.sort()
375        return dut_list[0]
376
377
378    def _sync_job_scheduled_for_duts(self, dut_list, afe):
379        """Checks if a synchronized reboot has been scheduled for these duts.
380
381        Grab all the host queue entries that aren't completed for the duts and
382        see if any of them have the expected job name.
383
384        @param dut_list:  List of duts to check on.
385        @param afe:       Instance of the AFE.
386
387        @returns True if the job is scheduled, False otherwise.
388        """
389        afe_hosts = afe.get_hosts(dut_list)
390        for afe_host in afe_hosts:
391            hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0)
392            for hqe in hqes:
393                job = afe.get_jobs(id=hqe.job.id)
394                if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME,
395                                           _SERVO_HOST_FORCE_REBOOT_TEST_NAME):
396                    return True
397        return False
398
399
400    def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
401        """Schedule a job to reboot the servo host.
402
403        When we schedule a job, it will create a ServoHost object which will
404        go through this entire flow of checking if a reboot is needed and
405        trying to schedule it.  There is probably a better approach to setting
406        up a synchronized reboot but I'm coming up short on better ideas so I
407        apologize for this circus show.
408
409        @param dut_list:      List of duts that need to be locked.
410        @param afe:           Instance of afe.
411        @param force_reboot:  Boolean to indicate if a forced reboot should be
412                              scheduled or not.
413        """
414        # If we've already scheduled job on a dut, we're done here.
415        if self._sync_job_scheduled_for_duts(dut_list, afe):
416            return
417
418        # Looks like we haven't scheduled a job yet.
419        test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot
420                else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
421        dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
422        getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
423        control_file = getter.get_control_file_contents_by_name(test)
424        control_type = control_data.CONTROL_TYPE_NAMES.SERVER
425        try:
426            afe.create_job(control_file=control_file, name=test,
427                           control_type=control_type, hosts=[dut])
428        except Exception as e:
429            # Sometimes creating the job will raise an exception. We'll log it
430            # but we don't want to fail because of it.
431            logging.exception('Scheduling reboot job failed: %s', e)
432            metadata = {'dut': dut,
433                        'servo_host': self.hostname,
434                        'error': str(e),
435                        'details': traceback.format_exc()}
436            # We want to track how often we fail here so we can justify
437            # investing some effort into hardening up afe.create_job().
438            autotest_es.post(use_http=True,
439                             type_str='servohost_Reboot_schedule_fail',
440                             metadata=metadata)
441
442
443    def reboot(self, *args, **dargs):
444        """Reboot using special servo host reboot command."""
445        super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
446                                      *args, **dargs)
447
448
449    def _check_for_reboot(self, updater):
450        """Reboot this servo host if an upgrade is waiting.
451
452        If the host has successfully downloaded and finalized a new
453        build, reboot.
454
455        @param updater: a ChromiumOSUpdater instance for checking
456            whether reboot is needed.
457        @return Return a (status, build) tuple reflecting the
458            update_engine status and current build of the host
459            at the end of the call.
460        """
461        current_build_number = self._get_release_version()
462        status = updater.check_update_status()
463        if status == autoupdater.UPDATER_NEED_REBOOT:
464            # Check if we need to schedule an organized reboot.
465            afe = frontend_wrappers.RetryingAFE(
466                    timeout_min=5, delay_sec=10,
467                    server=server_site_utils.get_global_afe_hostname())
468            dut_list = self.get_attached_duts(afe)
469            logging.info('servo host has the following duts: %s', dut_list)
470            if len(dut_list) > 1:
471                logging.info('servo host has multiple duts, scheduling '
472                             'synchronized reboot')
473                self.schedule_synchronized_reboot(dut_list, afe)
474                return status, current_build_number
475
476            logging.info('Rebooting servo host %s from build %s',
477                         self.hostname, current_build_number)
478            # Tell the reboot() call not to wait for completion.
479            # Otherwise, the call will log reboot failure if servo does
480            # not come back.  The logged reboot failure will lead to
481            # test job failure.  If the test does not require servo, we
482            # don't want servo failure to fail the test with error:
483            # `Host did not return from reboot` in status.log.
484            self.reboot(fastsync=True, wait=False)
485
486            # We told the reboot() call not to wait, but we need to wait
487            # for the reboot before we continue.  Alas.  The code from
488            # here below is basically a copy of Host.wait_for_restart(),
489            # with the logging bits ripped out, so that they can't cause
490            # the failure logging problem described above.
491            #
492            # The black stain that this has left on my soul can never be
493            # erased.
494            old_boot_id = self.get_boot_id()
495            if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
496                                  warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
497                                  old_boot_id=old_boot_id):
498                raise error.AutoservHostError(
499                        'servo host %s failed to shut down.' %
500                        self.hostname)
501            if self.wait_up(timeout=120):
502                current_build_number = self._get_release_version()
503                status = updater.check_update_status()
504                logging.info('servo host %s back from reboot, with build %s',
505                             self.hostname, current_build_number)
506            else:
507                raise error.AutoservHostError(
508                        'servo host %s failed to come back from reboot.' %
509                        self.hostname)
510        return status, current_build_number
511
512
513    def update_image(self, wait_for_update=False):
514        """Update the image on the servo host, if needed.
515
516        This method recognizes the following cases:
517          * If the Host is not running Chrome OS, do nothing.
518          * If a previously triggered update is now complete, reboot
519            to the new version.
520          * If the host is processing a previously triggered update,
521            do nothing.
522          * If the host is running a version of Chrome OS different
523            from the default for servo Hosts, trigger an update, but
524            don't wait for it to complete.
525
526        @param wait_for_update If an update needs to be applied and
527            this is true, then don't return until the update is
528            downloaded and finalized, and the host rebooted.
529        @raises dev_server.DevServerException: If all the devservers are down.
530        @raises site_utils.ParseBuildNameException: If the devserver returns
531            an invalid build name.
532        @raises autoupdater.ChromiumOSError: If something goes wrong in the
533            checking update engine client status or applying an update.
534        @raises AutoservRunError: If the update_engine_client isn't present on
535            the host, and the host is a cros_host.
536
537        """
538        # servod could be running in a Ubuntu workstation.
539        if not self.is_cros_host():
540            logging.info('Not attempting an update, either %s is not running '
541                         'chromeos or we cannot find enough information about '
542                         'the host.', self.hostname)
543            return
544
545        if lsbrelease_utils.is_moblab():
546            logging.info('Not attempting an update, %s is running moblab.',
547                         self.hostname)
548            return
549
550        target_build = afe_utils.get_stable_cros_image_name(self.get_board())
551        target_build_number = server_site_utils.ParseBuildName(
552                target_build)[3]
553        # For servo image staging, we want it as more widely distributed as
554        # possible, so that devservers' load can be evenly distributed. So use
555        # hostname instead of target_build as hash.
556        ds = dev_server.ImageServer.resolve(self.hostname,
557                                            hostname=self.hostname)
558        url = ds.get_update_url(target_build)
559
560        updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
561        status, current_build_number = self._check_for_reboot(updater)
562        update_pending = True
563        if status in autoupdater.UPDATER_PROCESSING_UPDATE:
564            logging.info('servo host %s already processing an update, update '
565                         'engine client status=%s', self.hostname, status)
566        elif current_build_number != target_build_number:
567            logging.info('Using devserver url: %s to trigger update on '
568                         'servo host %s, from %s to %s', url, self.hostname,
569                         current_build_number, target_build_number)
570            try:
571                ds.stage_artifacts(target_build,
572                                   artifacts=['full_payload'])
573            except Exception as e:
574                logging.error('Staging artifacts failed: %s', str(e))
575                logging.error('Abandoning update for this cycle.')
576            else:
577                try:
578                    # TODO(jrbarnette): This 'touch' is a gross hack
579                    # to get us past crbug.com/613603.  Once that
580                    # bug is resolved, we should remove this code.
581                    self.run('touch /home/chronos/.oobe_completed')
582                    updater.trigger_update()
583                except autoupdater.RootFSUpdateError as e:
584                    trigger_download_status = 'failed with %s' % str(e)
585                    metrics.Counter('chromeos/autotest/servo/'
586                                    'rootfs_update_failed').increment()
587                else:
588                    trigger_download_status = 'passed'
589                logging.info('Triggered download and update %s for %s, '
590                             'update engine currently in status %s',
591                             trigger_download_status, self.hostname,
592                             updater.check_update_status())
593        else:
594            logging.info('servo host %s does not require an update.',
595                         self.hostname)
596            update_pending = False
597
598        if update_pending and wait_for_update:
599            logging.info('Waiting for servo update to complete.')
600            self.run('update_engine_client --follow', ignore_status=True)
601
602
603    def verify(self, silent=False):
604        """Update the servo host and verify it's in a good state.
605
606        @param silent   If true, suppress logging in `status.log`.
607        """
608        # TODO(jrbarnette) Old versions of beaglebone_servo include
609        # the powerd package.  If you touch the .oobe_completed file
610        # (as we do to work around an update_engine problem), then
611        # powerd will eventually shut down the beaglebone for lack
612        # of (apparent) activity.  Current versions of
613        # beaglebone_servo don't have powerd, but until we can purge
614        # the lab of the old images, we need to make sure powerd
615        # isn't running.
616        self.run('stop powerd', ignore_status=True)
617        try:
618            self._repair_strategy.verify(self, silent)
619        except:
620            self.disconnect_servo()
621            raise
622
623
624    def repair(self, silent=False):
625        """Attempt to repair servo host.
626
627        @param silent   If true, suppress logging in `status.log`.
628        """
629        try:
630            self._repair_strategy.repair(self, silent)
631        except:
632            self.disconnect_servo()
633            raise
634
635
636    def has_power(self):
637        """Return whether or not the servo host is powered by PoE."""
638        # TODO(fdeng): See crbug.com/302791
639        # For now, assume all servo hosts in the lab have power.
640        return self.is_in_lab()
641
642
643    def power_cycle(self):
644        """Cycle power to this host via PoE if it is a lab device.
645
646        @raises AutoservRepairError if it fails to power cycle the
647                servo host.
648
649        """
650        if self.has_power():
651            try:
652                rpm_client.set_power(self.hostname, 'CYCLE')
653            except (socket.error, xmlrpclib.Error,
654                    httplib.BadStatusLine,
655                    rpm_client.RemotePowerException) as e:
656                raise hosts.AutoservRepairError(
657                        'Power cycling %s failed: %s' % (self.hostname, e))
658        else:
659            logging.info('Skipping power cycling, not a lab device.')
660
661
662    def get_servo(self):
663        """Get the cached servo.Servo object.
664
665        @return: a servo.Servo object.
666        """
667        return self._servo
668
669
670def make_servo_hostname(dut_hostname):
671    """Given a DUT's hostname, return the hostname of its servo.
672
673    @param dut_hostname: hostname of a DUT.
674
675    @return hostname of the DUT's servo.
676
677    """
678    host_parts = dut_hostname.split('.')
679    host_parts[0] = host_parts[0] + '-servo'
680    return '.'.join(host_parts)
681
682
683def servo_host_is_up(servo_hostname):
684    """Given a servo host name, return if it's up or not.
685
686    @param servo_hostname: hostname of the servo host.
687
688    @return True if it's up, False otherwise
689    """
690    # Technically, this duplicates the SSH ping done early in the servo
691    # proxy initialization code.  However, this ping ends in a couple
692    # seconds when if fails, rather than the 60 seconds it takes to decide
693    # that an SSH ping has timed out.  Specifically, that timeout happens
694    # when our servo DNS name resolves, but there is no host at that IP.
695    logging.info('Pinging servo host at %s', servo_hostname)
696    ping_config = ping_runner.PingConfig(
697            servo_hostname, count=3,
698            ignore_result=True, ignore_status=True)
699    return ping_runner.PingRunner().ping(ping_config).received > 0
700
701
702def _map_afe_board_to_servo_board(afe_board):
703    """Map a board we get from the AFE to a servo appropriate value.
704
705    Many boards are identical to other boards for servo's purposes.
706    This function makes that mapping.
707
708    @param afe_board string board name received from AFE.
709    @return board we expect servo to have.
710
711    """
712    KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
713    BOARD_MAP = {'gizmo': 'panther'}
714    mapped_board = afe_board
715    if afe_board in BOARD_MAP:
716        mapped_board = BOARD_MAP[afe_board]
717    else:
718        for suffix in KNOWN_SUFFIXES:
719            if afe_board.endswith(suffix):
720                mapped_board = afe_board[0:-len(suffix)]
721                break
722    if mapped_board != afe_board:
723        logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
724    return mapped_board
725
726
727def _get_standard_servo_args(dut_host):
728    """Return servo data associated with a given DUT.
729
730    This checks for the presence of servo host and port attached to the
731    given `dut_host`.  This data should be stored in the
732    `_afe_host.attributes` field in the provided `dut_host` parameter.
733
734    @param dut_host   Instance of `Host` on which to find the servo
735                      attributes.
736    @return A tuple of `servo_args` dict with host and an option port,
737            plus an `is_in_lab` flag indicating whether this in the CrOS
738            test lab, or some different environment.
739    """
740    servo_args = None
741    is_in_lab = False
742    is_ssp_moblab = False
743    if utils.is_in_container():
744        is_moblab = _CONFIG.get_config_value(
745                'SSP', 'is_moblab', type=bool, default=False)
746        is_ssp_moblab = is_moblab
747    else:
748        is_moblab = utils.is_moblab()
749    attrs = dut_host._afe_host.attributes
750    if attrs and SERVO_HOST_ATTR in attrs:
751        servo_host = attrs[SERVO_HOST_ATTR]
752        if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']):
753            servo_host = _CONFIG.get_config_value(
754                    'SSP', 'host_container_ip', type=str, default=None)
755        servo_args = {SERVO_HOST_ATTR: servo_host}
756        if SERVO_PORT_ATTR in attrs:
757            try:
758                servo_port = attrs[SERVO_PORT_ATTR]
759                servo_args[SERVO_PORT_ATTR] = int(servo_port)
760            except ValueError:
761                logging.error('servo port is not an int: %s', servo_port)
762                # Let's set the servo args to None since we're not creating
763                # the ServoHost object with the proper port now.
764                servo_args = None
765        if SERVO_SERIAL_ATTR in attrs:
766            servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR]
767        is_in_lab = (not is_moblab
768                     and utils.host_is_in_lab_zone(servo_host))
769
770    # TODO(jrbarnette):  This test to use the default lab servo hostname
771    # is a legacy that we need only until every host in the DB has
772    # proper attributes.
773    elif (not is_moblab and
774            not dnsname_mangler.is_ip_address(dut_host.hostname)):
775        servo_host = make_servo_hostname(dut_host.hostname)
776        is_in_lab = utils.host_is_in_lab_zone(servo_host)
777        if is_in_lab:
778            servo_args = {SERVO_HOST_ATTR: servo_host}
779    if servo_args is not None:
780        info = dut_host.host_info_store.get()
781        if info.board:
782            servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(
783                    info.board)
784    return servo_args, is_in_lab
785
786
787def create_servo_host(dut, servo_args, try_lab_servo=False,
788                      try_servo_repair=False):
789    """Create a ServoHost object for a given DUT, if appropriate.
790
791    This function attempts to create and verify or repair a `ServoHost`
792    object for a servo connected to the given `dut`, subject to various
793    constraints imposed by the parameters:
794      * When the `servo_args` parameter is not `None`, a servo
795        host must be created, and must be checked with `repair()`.
796      * Otherwise, if a servo exists in the lab and `try_lab_servo` is
797        true:
798          * If `try_servo_repair` is true, then create a servo host and
799            check it with `repair()`.
800          * Otherwise, if the servo responds to `ping` then create a
801            servo host and check it with `verify()`.
802
803    In cases where `servo_args` was not `None`, repair failure
804    exceptions are passed back to the caller; otherwise, exceptions
805    are logged and then discarded.  Note that this only happens in cases
806    where we're called from a test (not special task) control file that
807    has an explicit dependency on servo.  In that case, we require that
808    repair not write to `status.log`, so as to avoid polluting test
809    results.
810
811    TODO(jrbarnette):  The special handling for servo in test control
812    files is a thorn in my flesh; I dearly hope to see it cut out before
813    my retirement.
814
815    Parameters for a servo host consist of a host name, port number, and
816    DUT board, and are determined from one of these sources, in order of
817    priority:
818      * Servo attributes from the `dut` parameter take precedence over
819        all other sources of information.
820      * If a DNS entry for the servo based on the DUT hostname exists in
821        the CrOS lab network, that hostname is used with the default
822        port and the DUT's board.
823      * If no other options are found, the parameters will be taken
824        from the `servo_args` dict passed in from the caller.
825
826    @param dut            An instance of `Host` from which to take
827                          servo parameters (if available).
828    @param servo_args     A dictionary with servo parameters to use if
829                          they can't be found from `dut`.  If this
830                          argument is supplied, unrepaired exceptions
831                          from `verify()` will be passed back to the
832                          caller.
833    @param try_lab_servo  If not true, servo host creation will be
834                          skipped unless otherwise required by the
835                          caller.
836    @param try_servo_repair  If true, check a servo host with
837                          `repair()` instead of `verify()`.
838
839    @returns: A ServoHost object or None. See comments above.
840
841    """
842    servo_dependency = servo_args is not None
843    is_in_lab = False
844    if dut is not None and (try_lab_servo or servo_dependency):
845        servo_args_override, is_in_lab = _get_standard_servo_args(dut)
846        if servo_args_override is not None:
847            servo_args = servo_args_override
848    if servo_args is None:
849        return None
850    if (not servo_dependency and not try_servo_repair and
851            not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
852        return None
853    newhost = ServoHost(is_in_lab=is_in_lab, **servo_args)
854    # Note that the logic of repair() includes everything done
855    # by verify().  It's sufficient to call one or the other;
856    # we don't need both.
857    if servo_dependency:
858        newhost.repair(silent=True)
859    else:
860        try:
861            if try_servo_repair:
862                newhost.repair()
863            else:
864                newhost.verify()
865        except Exception:
866            operation = 'repair' if try_servo_repair else 'verification'
867            logging.exception('Servo %s failed for %s',
868                              operation, newhost.hostname)
869    return newhost
870