1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4# 5# Expects to be run in an environment with sudo and no interactive password 6# prompt, such as within the Chromium OS development chroot. 7 8 9"""This file provides core logic for servo verify/repair process.""" 10 11 12import httplib 13import logging 14import socket 15import traceback 16import xmlrpclib 17 18from autotest_lib.client.bin import utils 19from autotest_lib.client.common_lib import control_data 20from autotest_lib.client.common_lib import error 21from autotest_lib.client.common_lib import global_config 22from autotest_lib.client.common_lib import host_states 23from autotest_lib.client.common_lib import hosts 24from autotest_lib.client.common_lib import lsbrelease_utils 25from autotest_lib.client.common_lib.cros import autoupdater 26from autotest_lib.client.common_lib.cros import dev_server 27from autotest_lib.client.common_lib.cros import retry 28from autotest_lib.client.common_lib.cros.graphite import autotest_es 29from autotest_lib.client.common_lib.cros.network import ping_runner 30from autotest_lib.client.cros import constants as client_constants 31from autotest_lib.server import afe_utils 32from autotest_lib.server import site_utils as server_site_utils 33from autotest_lib.server.cros import dnsname_mangler 34from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 35from autotest_lib.server.cros.dynamic_suite import control_file_getter 36from autotest_lib.server.cros.servo import servo 37from autotest_lib.server.hosts import servo_repair 38from autotest_lib.server.hosts import ssh_host 39from autotest_lib.site_utils.rpm_control_system import rpm_client 40 41try: 42 from chromite.lib import metrics 43except ImportError: 44 metrics = utils.metrics_mock 45 46 47# Names of the host attributes in the database that represent the values for 48# the servo_host and servo_port for a servo connected to the DUT. 49SERVO_HOST_ATTR = 'servo_host' 50SERVO_PORT_ATTR = 'servo_port' 51SERVO_BOARD_ATTR = 'servo_board' 52SERVO_SERIAL_ATTR = 'servo_serial' 53 54_CONFIG = global_config.global_config 55ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value( 56 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False) 57 58AUTOTEST_BASE = _CONFIG.get_config_value( 59 'SCHEDULER', 'drone_installation_directory', 60 default='/usr/local/autotest') 61 62_SERVO_HOST_REBOOT_TEST_NAME = 'servohost_Reboot' 63_SERVO_HOST_FORCE_REBOOT_TEST_NAME = 'servohost_Reboot.force_reboot' 64 65class ServoHost(ssh_host.SSHHost): 66 """Host class for a host that controls a servo, e.g. beaglebone.""" 67 68 DEFAULT_PORT = 9999 69 70 # Timeout for initializing servo signals. 71 INITIALIZE_SERVO_TIMEOUT_SECS = 30 72 73 # Ready test function 74 SERVO_READY_METHOD = 'get_version' 75 76 REBOOT_CMD = 'sleep 1; reboot & sleep 10; reboot -f' 77 78 79 def _initialize(self, servo_host='localhost', 80 servo_port=DEFAULT_PORT, servo_board=None, 81 servo_serial=None, is_in_lab=None, *args, **dargs): 82 """Initialize a ServoHost instance. 83 84 A ServoHost instance represents a host that controls a servo. 85 86 @param servo_host: Name of the host where the servod process 87 is running. 88 @param servo_port: Port the servod process is listening on. 89 @param servo_board: Board that the servo is connected to. 90 @param is_in_lab: True if the servo host is in Cros Lab. Default is set 91 to None, for which utils.host_is_in_lab_zone will be 92 called to check if the servo host is in Cros lab. 93 94 """ 95 super(ServoHost, self)._initialize(hostname=servo_host, 96 *args, **dargs) 97 self.servo_port = servo_port 98 self.servo_board = servo_board 99 self.servo_serial = servo_serial 100 self._servo = None 101 self._repair_strategy = ( 102 servo_repair.create_servo_repair_strategy()) 103 self._is_localhost = (self.hostname == 'localhost') 104 if self._is_localhost: 105 self._is_in_lab = False 106 elif is_in_lab is None: 107 self._is_in_lab = utils.host_is_in_lab_zone(self.hostname) 108 else: 109 self._is_in_lab = is_in_lab 110 111 # Commands on the servo host must be run by the superuser. 112 # Our account on a remote host is root, but if our target is 113 # localhost then we might be running unprivileged. If so, 114 # `sudo` will have to be added to the commands. 115 if self._is_localhost: 116 self._sudo_required = utils.system_output('id -u') != '0' 117 else: 118 self._sudo_required = False 119 120 121 def connect_servo(self): 122 """Establish a connection to the servod server on this host. 123 124 Initializes `self._servo` and then verifies that all network 125 connections are working. This will create an ssh tunnel if 126 it's required. 127 128 As a side effect of testing the connection, all signals on the 129 target servo are reset to default values, and the USB stick is 130 set to the neutral (off) position. 131 """ 132 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial) 133 timeout, _ = retry.timeout( 134 servo_obj.initialize_dut, 135 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS) 136 if timeout: 137 raise hosts.AutoservVerifyError( 138 'Servo initialize timed out.') 139 self._servo = servo_obj 140 141 142 def disconnect_servo(self): 143 """Disconnect our servo if it exists. 144 145 If we've previously successfully connected to our servo, 146 disconnect any established ssh tunnel, and set `self._servo` 147 back to `None`. 148 """ 149 if self._servo: 150 # N.B. This call is safe even without a tunnel: 151 # rpc_server_tracker.disconnect() silently ignores 152 # unknown ports. 153 self.rpc_server_tracker.disconnect(self.servo_port) 154 self._servo = None 155 156 157 def is_in_lab(self): 158 """Check whether the servo host is a lab device. 159 160 @returns: True if the servo host is in Cros Lab, otherwise False. 161 162 """ 163 return self._is_in_lab 164 165 166 def is_localhost(self): 167 """Checks whether the servo host points to localhost. 168 169 @returns: True if it points to localhost, otherwise False. 170 171 """ 172 return self._is_localhost 173 174 175 def get_servod_server_proxy(self): 176 """Return a proxy that can be used to communicate with servod server. 177 178 @returns: An xmlrpclib.ServerProxy that is connected to the servod 179 server on the host. 180 """ 181 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost(): 182 return self.rpc_server_tracker.xmlrpc_connect( 183 None, self.servo_port, 184 ready_test_name=self.SERVO_READY_METHOD, 185 timeout_seconds=60) 186 else: 187 remote = 'http://%s:%s' % (self.hostname, self.servo_port) 188 return xmlrpclib.ServerProxy(remote) 189 190 191 def is_cros_host(self): 192 """Check if a servo host is running chromeos. 193 194 @return: True if the servo host is running chromeos. 195 False if it isn't, or we don't have enough information. 196 """ 197 try: 198 result = self.run('grep -q CHROMEOS /etc/lsb-release', 199 ignore_status=True, timeout=10) 200 except (error.AutoservRunError, error.AutoservSSHTimeout): 201 return False 202 return result.exit_status == 0 203 204 205 def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None, 206 connect_timeout=None, alive_interval=None): 207 """Override default make_ssh_command to use tuned options. 208 209 Tuning changes: 210 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH 211 connection failure. Consistency with remote_access.py. 212 213 - ServerAliveInterval=180; which causes SSH to ping connection every 214 180 seconds. In conjunction with ServerAliveCountMax ensures 215 that if the connection dies, Autotest will bail out quickly. 216 217 - ServerAliveCountMax=3; consistency with remote_access.py. 218 219 - ConnectAttempts=4; reduce flakiness in connection errors; 220 consistency with remote_access.py. 221 222 - UserKnownHostsFile=/dev/null; we don't care about the keys. 223 224 - SSH protocol forced to 2; needed for ServerAliveInterval. 225 226 @param user User name to use for the ssh connection. 227 @param port Port on the target host to use for ssh connection. 228 @param opts Additional options to the ssh command. 229 @param hosts_file Ignored. 230 @param connect_timeout Ignored. 231 @param alive_interval Ignored. 232 233 @returns: An ssh command with the requested settings. 234 235 """ 236 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no' 237 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes' 238 ' -o ConnectTimeout=30 -o ServerAliveInterval=180' 239 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4' 240 ' -o Protocol=2 -l %s -p %d') 241 return base_command % (opts, user, port) 242 243 244 def _make_scp_cmd(self, sources, dest): 245 """Format scp command. 246 247 Given a list of source paths and a destination path, produces the 248 appropriate scp command for encoding it. Remote paths must be 249 pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost 250 to allow additional ssh options. 251 252 @param sources: A list of source paths to copy from. 253 @param dest: Destination path to copy to. 254 255 @returns: An scp command that copies |sources| on local machine to 256 |dest| on the remote servo host. 257 258 """ 259 command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no ' 260 '-o UserKnownHostsFile=/dev/null -P %d %s "%s"') 261 return command % (self.master_ssh_option, 262 self.port, ' '.join(sources), dest) 263 264 265 def run(self, command, timeout=3600, ignore_status=False, 266 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 267 connect_timeout=30, ssh_failure_retry_ok=False, 268 options='', stdin=None, verbose=True, args=()): 269 """Run a command on the servo host. 270 271 Extends method `run` in SSHHost. If the servo host is a remote device, 272 it will call `run` in SSHost without changing anything. 273 If the servo host is 'localhost', it will call utils.system_output. 274 275 @param command: The command line string. 276 @param timeout: Time limit in seconds before attempting to 277 kill the running process. The run() function 278 will take a few seconds longer than 'timeout' 279 to complete if it has to kill the process. 280 @param ignore_status: Do not raise an exception, no matter 281 what the exit code of the command is. 282 @param stdout_tee/stderr_tee: Where to tee the stdout/stderr. 283 @param connect_timeout: SSH connection timeout (in seconds) 284 Ignored if host is 'localhost'. 285 @param options: String with additional ssh command options 286 Ignored if host is 'localhost'. 287 @param ssh_failure_retry_ok: when True and ssh connection failure is 288 suspected, OK to retry command (but not 289 compulsory, and likely not needed here) 290 @param stdin: Stdin to pass (a string) to the executed command. 291 @param verbose: Log the commands. 292 @param args: Sequence of strings to pass as arguments to command by 293 quoting them in " and escaping their contents if necessary. 294 295 @returns: A utils.CmdResult object. 296 297 @raises AutoservRunError if the command failed. 298 @raises AutoservSSHTimeout SSH connection has timed out. Only applies 299 when servo host is not 'localhost'. 300 301 """ 302 run_args = {'command': command, 'timeout': timeout, 303 'ignore_status': ignore_status, 'stdout_tee': stdout_tee, 304 'stderr_tee': stderr_tee, 'stdin': stdin, 305 'verbose': verbose, 'args': args} 306 if self.is_localhost(): 307 if self._sudo_required: 308 run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape( 309 command) 310 try: 311 return utils.run(**run_args) 312 except error.CmdError as e: 313 logging.error(e) 314 raise error.AutoservRunError('command execution error', 315 e.result_obj) 316 else: 317 run_args['connect_timeout'] = connect_timeout 318 run_args['options'] = options 319 return super(ServoHost, self).run(**run_args) 320 321 322 def _get_release_version(self): 323 """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release. 324 325 @returns The version string in lsb-release, under attribute 326 CHROMEOS_RELEASE_VERSION. 327 """ 328 lsb_release_content = self.run( 329 'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip() 330 return lsbrelease_utils.get_chromeos_release_version( 331 lsb_release_content=lsb_release_content) 332 333 334 def get_attached_duts(self, afe): 335 """Gather a list of duts that use this servo host. 336 337 @param afe: afe instance. 338 339 @returns list of duts. 340 """ 341 return afe.get_hosts_by_attribute( 342 attribute=SERVO_HOST_ATTR, value=self.hostname) 343 344 345 def get_board(self): 346 """Determine the board for this servo host. 347 348 @returns a string representing this servo host's board. 349 """ 350 return lsbrelease_utils.get_current_board( 351 lsb_release_content=self.run('cat /etc/lsb-release').stdout) 352 353 354 def _choose_dut_for_synchronized_reboot(self, dut_list, afe): 355 """Choose which dut to schedule servo host reboot job. 356 357 We'll want a semi-deterministic way of selecting which host should be 358 scheduled for the servo host reboot job. For now we'll sort the 359 list with the expectation the dut list will stay consistent. 360 From there we'll grab the first dut that is available so we 361 don't schedule a job on a dut that will never run. 362 363 @param dut_list: List of the dut hostnames to choose from. 364 @param afe: Instance of the AFE. 365 366 @return hostname of dut to schedule job on. 367 """ 368 afe_hosts = afe.get_hosts(dut_list) 369 afe_hosts.sort() 370 for afe_host in afe_hosts: 371 if afe_host.status not in host_states.UNAVAILABLE_STATES: 372 return afe_host.hostname 373 # If they're all unavailable, just return the first sorted dut. 374 dut_list.sort() 375 return dut_list[0] 376 377 378 def _sync_job_scheduled_for_duts(self, dut_list, afe): 379 """Checks if a synchronized reboot has been scheduled for these duts. 380 381 Grab all the host queue entries that aren't completed for the duts and 382 see if any of them have the expected job name. 383 384 @param dut_list: List of duts to check on. 385 @param afe: Instance of the AFE. 386 387 @returns True if the job is scheduled, False otherwise. 388 """ 389 afe_hosts = afe.get_hosts(dut_list) 390 for afe_host in afe_hosts: 391 hqes = afe.get_host_queue_entries(host=afe_host.id, complete=0) 392 for hqe in hqes: 393 job = afe.get_jobs(id=hqe.job.id) 394 if job and job[0].name in (_SERVO_HOST_REBOOT_TEST_NAME, 395 _SERVO_HOST_FORCE_REBOOT_TEST_NAME): 396 return True 397 return False 398 399 400 def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False): 401 """Schedule a job to reboot the servo host. 402 403 When we schedule a job, it will create a ServoHost object which will 404 go through this entire flow of checking if a reboot is needed and 405 trying to schedule it. There is probably a better approach to setting 406 up a synchronized reboot but I'm coming up short on better ideas so I 407 apologize for this circus show. 408 409 @param dut_list: List of duts that need to be locked. 410 @param afe: Instance of afe. 411 @param force_reboot: Boolean to indicate if a forced reboot should be 412 scheduled or not. 413 """ 414 # If we've already scheduled job on a dut, we're done here. 415 if self._sync_job_scheduled_for_duts(dut_list, afe): 416 return 417 418 # Looks like we haven't scheduled a job yet. 419 test = (_SERVO_HOST_REBOOT_TEST_NAME if not force_reboot 420 else _SERVO_HOST_FORCE_REBOOT_TEST_NAME) 421 dut = self._choose_dut_for_synchronized_reboot(dut_list, afe) 422 getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE]) 423 control_file = getter.get_control_file_contents_by_name(test) 424 control_type = control_data.CONTROL_TYPE_NAMES.SERVER 425 try: 426 afe.create_job(control_file=control_file, name=test, 427 control_type=control_type, hosts=[dut]) 428 except Exception as e: 429 # Sometimes creating the job will raise an exception. We'll log it 430 # but we don't want to fail because of it. 431 logging.exception('Scheduling reboot job failed: %s', e) 432 metadata = {'dut': dut, 433 'servo_host': self.hostname, 434 'error': str(e), 435 'details': traceback.format_exc()} 436 # We want to track how often we fail here so we can justify 437 # investing some effort into hardening up afe.create_job(). 438 autotest_es.post(use_http=True, 439 type_str='servohost_Reboot_schedule_fail', 440 metadata=metadata) 441 442 443 def reboot(self, *args, **dargs): 444 """Reboot using special servo host reboot command.""" 445 super(ServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD, 446 *args, **dargs) 447 448 449 def _check_for_reboot(self, updater): 450 """Reboot this servo host if an upgrade is waiting. 451 452 If the host has successfully downloaded and finalized a new 453 build, reboot. 454 455 @param updater: a ChromiumOSUpdater instance for checking 456 whether reboot is needed. 457 @return Return a (status, build) tuple reflecting the 458 update_engine status and current build of the host 459 at the end of the call. 460 """ 461 current_build_number = self._get_release_version() 462 status = updater.check_update_status() 463 if status == autoupdater.UPDATER_NEED_REBOOT: 464 # Check if we need to schedule an organized reboot. 465 afe = frontend_wrappers.RetryingAFE( 466 timeout_min=5, delay_sec=10, 467 server=server_site_utils.get_global_afe_hostname()) 468 dut_list = self.get_attached_duts(afe) 469 logging.info('servo host has the following duts: %s', dut_list) 470 if len(dut_list) > 1: 471 logging.info('servo host has multiple duts, scheduling ' 472 'synchronized reboot') 473 self.schedule_synchronized_reboot(dut_list, afe) 474 return status, current_build_number 475 476 logging.info('Rebooting servo host %s from build %s', 477 self.hostname, current_build_number) 478 # Tell the reboot() call not to wait for completion. 479 # Otherwise, the call will log reboot failure if servo does 480 # not come back. The logged reboot failure will lead to 481 # test job failure. If the test does not require servo, we 482 # don't want servo failure to fail the test with error: 483 # `Host did not return from reboot` in status.log. 484 self.reboot(fastsync=True, wait=False) 485 486 # We told the reboot() call not to wait, but we need to wait 487 # for the reboot before we continue. Alas. The code from 488 # here below is basically a copy of Host.wait_for_restart(), 489 # with the logging bits ripped out, so that they can't cause 490 # the failure logging problem described above. 491 # 492 # The black stain that this has left on my soul can never be 493 # erased. 494 old_boot_id = self.get_boot_id() 495 if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT, 496 warning_timer=self.WAIT_DOWN_REBOOT_WARNING, 497 old_boot_id=old_boot_id): 498 raise error.AutoservHostError( 499 'servo host %s failed to shut down.' % 500 self.hostname) 501 if self.wait_up(timeout=120): 502 current_build_number = self._get_release_version() 503 status = updater.check_update_status() 504 logging.info('servo host %s back from reboot, with build %s', 505 self.hostname, current_build_number) 506 else: 507 raise error.AutoservHostError( 508 'servo host %s failed to come back from reboot.' % 509 self.hostname) 510 return status, current_build_number 511 512 513 def update_image(self, wait_for_update=False): 514 """Update the image on the servo host, if needed. 515 516 This method recognizes the following cases: 517 * If the Host is not running Chrome OS, do nothing. 518 * If a previously triggered update is now complete, reboot 519 to the new version. 520 * If the host is processing a previously triggered update, 521 do nothing. 522 * If the host is running a version of Chrome OS different 523 from the default for servo Hosts, trigger an update, but 524 don't wait for it to complete. 525 526 @param wait_for_update If an update needs to be applied and 527 this is true, then don't return until the update is 528 downloaded and finalized, and the host rebooted. 529 @raises dev_server.DevServerException: If all the devservers are down. 530 @raises site_utils.ParseBuildNameException: If the devserver returns 531 an invalid build name. 532 @raises autoupdater.ChromiumOSError: If something goes wrong in the 533 checking update engine client status or applying an update. 534 @raises AutoservRunError: If the update_engine_client isn't present on 535 the host, and the host is a cros_host. 536 537 """ 538 # servod could be running in a Ubuntu workstation. 539 if not self.is_cros_host(): 540 logging.info('Not attempting an update, either %s is not running ' 541 'chromeos or we cannot find enough information about ' 542 'the host.', self.hostname) 543 return 544 545 if lsbrelease_utils.is_moblab(): 546 logging.info('Not attempting an update, %s is running moblab.', 547 self.hostname) 548 return 549 550 target_build = afe_utils.get_stable_cros_image_name(self.get_board()) 551 target_build_number = server_site_utils.ParseBuildName( 552 target_build)[3] 553 # For servo image staging, we want it as more widely distributed as 554 # possible, so that devservers' load can be evenly distributed. So use 555 # hostname instead of target_build as hash. 556 ds = dev_server.ImageServer.resolve(self.hostname, 557 hostname=self.hostname) 558 url = ds.get_update_url(target_build) 559 560 updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self) 561 status, current_build_number = self._check_for_reboot(updater) 562 update_pending = True 563 if status in autoupdater.UPDATER_PROCESSING_UPDATE: 564 logging.info('servo host %s already processing an update, update ' 565 'engine client status=%s', self.hostname, status) 566 elif current_build_number != target_build_number: 567 logging.info('Using devserver url: %s to trigger update on ' 568 'servo host %s, from %s to %s', url, self.hostname, 569 current_build_number, target_build_number) 570 try: 571 ds.stage_artifacts(target_build, 572 artifacts=['full_payload']) 573 except Exception as e: 574 logging.error('Staging artifacts failed: %s', str(e)) 575 logging.error('Abandoning update for this cycle.') 576 else: 577 try: 578 # TODO(jrbarnette): This 'touch' is a gross hack 579 # to get us past crbug.com/613603. Once that 580 # bug is resolved, we should remove this code. 581 self.run('touch /home/chronos/.oobe_completed') 582 updater.trigger_update() 583 except autoupdater.RootFSUpdateError as e: 584 trigger_download_status = 'failed with %s' % str(e) 585 metrics.Counter('chromeos/autotest/servo/' 586 'rootfs_update_failed').increment() 587 else: 588 trigger_download_status = 'passed' 589 logging.info('Triggered download and update %s for %s, ' 590 'update engine currently in status %s', 591 trigger_download_status, self.hostname, 592 updater.check_update_status()) 593 else: 594 logging.info('servo host %s does not require an update.', 595 self.hostname) 596 update_pending = False 597 598 if update_pending and wait_for_update: 599 logging.info('Waiting for servo update to complete.') 600 self.run('update_engine_client --follow', ignore_status=True) 601 602 603 def verify(self, silent=False): 604 """Update the servo host and verify it's in a good state. 605 606 @param silent If true, suppress logging in `status.log`. 607 """ 608 # TODO(jrbarnette) Old versions of beaglebone_servo include 609 # the powerd package. If you touch the .oobe_completed file 610 # (as we do to work around an update_engine problem), then 611 # powerd will eventually shut down the beaglebone for lack 612 # of (apparent) activity. Current versions of 613 # beaglebone_servo don't have powerd, but until we can purge 614 # the lab of the old images, we need to make sure powerd 615 # isn't running. 616 self.run('stop powerd', ignore_status=True) 617 try: 618 self._repair_strategy.verify(self, silent) 619 except: 620 self.disconnect_servo() 621 raise 622 623 624 def repair(self, silent=False): 625 """Attempt to repair servo host. 626 627 @param silent If true, suppress logging in `status.log`. 628 """ 629 try: 630 self._repair_strategy.repair(self, silent) 631 except: 632 self.disconnect_servo() 633 raise 634 635 636 def has_power(self): 637 """Return whether or not the servo host is powered by PoE.""" 638 # TODO(fdeng): See crbug.com/302791 639 # For now, assume all servo hosts in the lab have power. 640 return self.is_in_lab() 641 642 643 def power_cycle(self): 644 """Cycle power to this host via PoE if it is a lab device. 645 646 @raises AutoservRepairError if it fails to power cycle the 647 servo host. 648 649 """ 650 if self.has_power(): 651 try: 652 rpm_client.set_power(self.hostname, 'CYCLE') 653 except (socket.error, xmlrpclib.Error, 654 httplib.BadStatusLine, 655 rpm_client.RemotePowerException) as e: 656 raise hosts.AutoservRepairError( 657 'Power cycling %s failed: %s' % (self.hostname, e)) 658 else: 659 logging.info('Skipping power cycling, not a lab device.') 660 661 662 def get_servo(self): 663 """Get the cached servo.Servo object. 664 665 @return: a servo.Servo object. 666 """ 667 return self._servo 668 669 670def make_servo_hostname(dut_hostname): 671 """Given a DUT's hostname, return the hostname of its servo. 672 673 @param dut_hostname: hostname of a DUT. 674 675 @return hostname of the DUT's servo. 676 677 """ 678 host_parts = dut_hostname.split('.') 679 host_parts[0] = host_parts[0] + '-servo' 680 return '.'.join(host_parts) 681 682 683def servo_host_is_up(servo_hostname): 684 """Given a servo host name, return if it's up or not. 685 686 @param servo_hostname: hostname of the servo host. 687 688 @return True if it's up, False otherwise 689 """ 690 # Technically, this duplicates the SSH ping done early in the servo 691 # proxy initialization code. However, this ping ends in a couple 692 # seconds when if fails, rather than the 60 seconds it takes to decide 693 # that an SSH ping has timed out. Specifically, that timeout happens 694 # when our servo DNS name resolves, but there is no host at that IP. 695 logging.info('Pinging servo host at %s', servo_hostname) 696 ping_config = ping_runner.PingConfig( 697 servo_hostname, count=3, 698 ignore_result=True, ignore_status=True) 699 return ping_runner.PingRunner().ping(ping_config).received > 0 700 701 702def _map_afe_board_to_servo_board(afe_board): 703 """Map a board we get from the AFE to a servo appropriate value. 704 705 Many boards are identical to other boards for servo's purposes. 706 This function makes that mapping. 707 708 @param afe_board string board name received from AFE. 709 @return board we expect servo to have. 710 711 """ 712 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets'] 713 BOARD_MAP = {'gizmo': 'panther'} 714 mapped_board = afe_board 715 if afe_board in BOARD_MAP: 716 mapped_board = BOARD_MAP[afe_board] 717 else: 718 for suffix in KNOWN_SUFFIXES: 719 if afe_board.endswith(suffix): 720 mapped_board = afe_board[0:-len(suffix)] 721 break 722 if mapped_board != afe_board: 723 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board) 724 return mapped_board 725 726 727def _get_standard_servo_args(dut_host): 728 """Return servo data associated with a given DUT. 729 730 This checks for the presence of servo host and port attached to the 731 given `dut_host`. This data should be stored in the 732 `_afe_host.attributes` field in the provided `dut_host` parameter. 733 734 @param dut_host Instance of `Host` on which to find the servo 735 attributes. 736 @return A tuple of `servo_args` dict with host and an option port, 737 plus an `is_in_lab` flag indicating whether this in the CrOS 738 test lab, or some different environment. 739 """ 740 servo_args = None 741 is_in_lab = False 742 is_ssp_moblab = False 743 if utils.is_in_container(): 744 is_moblab = _CONFIG.get_config_value( 745 'SSP', 'is_moblab', type=bool, default=False) 746 is_ssp_moblab = is_moblab 747 else: 748 is_moblab = utils.is_moblab() 749 attrs = dut_host._afe_host.attributes 750 if attrs and SERVO_HOST_ATTR in attrs: 751 servo_host = attrs[SERVO_HOST_ATTR] 752 if (is_ssp_moblab and servo_host in ['localhost', '127.0.0.1']): 753 servo_host = _CONFIG.get_config_value( 754 'SSP', 'host_container_ip', type=str, default=None) 755 servo_args = {SERVO_HOST_ATTR: servo_host} 756 if SERVO_PORT_ATTR in attrs: 757 try: 758 servo_port = attrs[SERVO_PORT_ATTR] 759 servo_args[SERVO_PORT_ATTR] = int(servo_port) 760 except ValueError: 761 logging.error('servo port is not an int: %s', servo_port) 762 # Let's set the servo args to None since we're not creating 763 # the ServoHost object with the proper port now. 764 servo_args = None 765 if SERVO_SERIAL_ATTR in attrs: 766 servo_args[SERVO_SERIAL_ATTR] = attrs[SERVO_SERIAL_ATTR] 767 is_in_lab = (not is_moblab 768 and utils.host_is_in_lab_zone(servo_host)) 769 770 # TODO(jrbarnette): This test to use the default lab servo hostname 771 # is a legacy that we need only until every host in the DB has 772 # proper attributes. 773 elif (not is_moblab and 774 not dnsname_mangler.is_ip_address(dut_host.hostname)): 775 servo_host = make_servo_hostname(dut_host.hostname) 776 is_in_lab = utils.host_is_in_lab_zone(servo_host) 777 if is_in_lab: 778 servo_args = {SERVO_HOST_ATTR: servo_host} 779 if servo_args is not None: 780 info = dut_host.host_info_store.get() 781 if info.board: 782 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board( 783 info.board) 784 return servo_args, is_in_lab 785 786 787def create_servo_host(dut, servo_args, try_lab_servo=False, 788 try_servo_repair=False): 789 """Create a ServoHost object for a given DUT, if appropriate. 790 791 This function attempts to create and verify or repair a `ServoHost` 792 object for a servo connected to the given `dut`, subject to various 793 constraints imposed by the parameters: 794 * When the `servo_args` parameter is not `None`, a servo 795 host must be created, and must be checked with `repair()`. 796 * Otherwise, if a servo exists in the lab and `try_lab_servo` is 797 true: 798 * If `try_servo_repair` is true, then create a servo host and 799 check it with `repair()`. 800 * Otherwise, if the servo responds to `ping` then create a 801 servo host and check it with `verify()`. 802 803 In cases where `servo_args` was not `None`, repair failure 804 exceptions are passed back to the caller; otherwise, exceptions 805 are logged and then discarded. Note that this only happens in cases 806 where we're called from a test (not special task) control file that 807 has an explicit dependency on servo. In that case, we require that 808 repair not write to `status.log`, so as to avoid polluting test 809 results. 810 811 TODO(jrbarnette): The special handling for servo in test control 812 files is a thorn in my flesh; I dearly hope to see it cut out before 813 my retirement. 814 815 Parameters for a servo host consist of a host name, port number, and 816 DUT board, and are determined from one of these sources, in order of 817 priority: 818 * Servo attributes from the `dut` parameter take precedence over 819 all other sources of information. 820 * If a DNS entry for the servo based on the DUT hostname exists in 821 the CrOS lab network, that hostname is used with the default 822 port and the DUT's board. 823 * If no other options are found, the parameters will be taken 824 from the `servo_args` dict passed in from the caller. 825 826 @param dut An instance of `Host` from which to take 827 servo parameters (if available). 828 @param servo_args A dictionary with servo parameters to use if 829 they can't be found from `dut`. If this 830 argument is supplied, unrepaired exceptions 831 from `verify()` will be passed back to the 832 caller. 833 @param try_lab_servo If not true, servo host creation will be 834 skipped unless otherwise required by the 835 caller. 836 @param try_servo_repair If true, check a servo host with 837 `repair()` instead of `verify()`. 838 839 @returns: A ServoHost object or None. See comments above. 840 841 """ 842 servo_dependency = servo_args is not None 843 is_in_lab = False 844 if dut is not None and (try_lab_servo or servo_dependency): 845 servo_args_override, is_in_lab = _get_standard_servo_args(dut) 846 if servo_args_override is not None: 847 servo_args = servo_args_override 848 if servo_args is None: 849 return None 850 if (not servo_dependency and not try_servo_repair and 851 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])): 852 return None 853 newhost = ServoHost(is_in_lab=is_in_lab, **servo_args) 854 # Note that the logic of repair() includes everything done 855 # by verify(). It's sufficient to call one or the other; 856 # we don't need both. 857 if servo_dependency: 858 newhost.repair(silent=True) 859 else: 860 try: 861 if try_servo_repair: 862 newhost.repair() 863 else: 864 newhost.verify() 865 except Exception: 866 operation = 'repair' if try_servo_repair else 'verification' 867 logging.exception('Servo %s failed for %s', 868 operation, newhost.hostname) 869 return newhost 870