1import os, time, socket, shutil, glob, logging, traceback, tempfile, re 2import shlex 3import subprocess 4 5from multiprocessing import Lock 6from autotest_lib.client.common_lib import autotemp, error 7from autotest_lib.server import utils, autotest 8from autotest_lib.server.hosts import host_info 9from autotest_lib.server.hosts import remote 10from autotest_lib.server.hosts import rpc_server_tracker 11from autotest_lib.client.common_lib.global_config import global_config 12 13# pylint: disable=C0111 14 15get_value = global_config.get_config_value 16enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 17 default=False) 18 19 20class AbstractSSHHost(remote.RemoteHost): 21 """ 22 This class represents a generic implementation of most of the 23 framework necessary for controlling a host via ssh. It implements 24 almost all of the abstract Host methods, except for the core 25 Host.run method. 26 """ 27 VERSION_PREFIX = '' 28 29 def _initialize(self, hostname, user="root", port=22, password="", 30 is_client_install_supported=True, afe_host=None, 31 host_info_store=None, *args, **dargs): 32 super(AbstractSSHHost, self)._initialize(hostname=hostname, 33 *args, **dargs) 34 """ 35 @param hostname: The hostname of the host. 36 @param user: The username to use when ssh'ing into the host. 37 @param password: The password to use when ssh'ing into the host. 38 @param port: The port to use for ssh. 39 @param is_client_install_supported: Boolean to indicate if we can 40 install autotest on the host. 41 @param afe_host: The host object attained from the AFE (get_hosts). 42 @param host_info_store: Optional host_info.CachingHostInfoStore object 43 to obtain / update host information. 44 """ 45 # IP address is retrieved only on demand. Otherwise the host 46 # initialization will fail for host is not online. 47 self._ip = None 48 self.user = user 49 self.port = port 50 self.password = password 51 self._is_client_install_supported = is_client_install_supported 52 self._use_rsync = None 53 self.known_hosts_file = tempfile.mkstemp()[1] 54 self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self); 55 56 """ 57 Master SSH connection background job, socket temp directory and socket 58 control path option. If master-SSH is enabled, these fields will be 59 initialized by start_master_ssh when a new SSH connection is initiated. 60 """ 61 self.master_ssh_job = None 62 self.master_ssh_tempdir = None 63 self.master_ssh_option = '' 64 65 # Create a Lock to protect against race conditions. 66 self._lock = Lock() 67 68 self._afe_host = afe_host or utils.EmptyAFEHost() 69 self.host_info_store = (host_info_store or 70 host_info.InMemoryHostInfoStore()) 71 72 @property 73 def ip(self): 74 """@return IP address of the host. 75 """ 76 if not self._ip: 77 self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 78 return self._ip 79 80 81 @property 82 def is_client_install_supported(self): 83 """" 84 Returns True if the host supports autotest client installs, False 85 otherwise. 86 """ 87 return self._is_client_install_supported 88 89 90 @property 91 def rpc_server_tracker(self): 92 """" 93 @return The RPC server tracker associated with this host. 94 """ 95 return self._rpc_server_tracker 96 97 98 def make_ssh_command(self, user="root", port=22, opts='', 99 hosts_file='/dev/null', 100 connect_timeout=30, alive_interval=300): 101 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no " 102 "-o UserKnownHostsFile=%s -o BatchMode=yes " 103 "-o ConnectTimeout=%d -o ServerAliveInterval=%d " 104 "-l %s -p %d") 105 assert isinstance(connect_timeout, (int, long)) 106 assert connect_timeout > 0 # can't disable the timeout 107 return base_command % (opts, hosts_file, connect_timeout, 108 alive_interval, user, port) 109 110 111 def use_rsync(self): 112 if self._use_rsync is not None: 113 return self._use_rsync 114 115 # Check if rsync is available on the remote host. If it's not, 116 # don't try to use it for any future file transfers. 117 self._use_rsync = self._check_rsync() 118 if not self._use_rsync: 119 logging.warning("rsync not available on remote host %s -- disabled", 120 self.hostname) 121 return self._use_rsync 122 123 124 def _check_rsync(self): 125 """ 126 Check if rsync is available on the remote host. 127 """ 128 try: 129 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 130 except error.AutoservRunError: 131 return False 132 return True 133 134 135 def _encode_remote_paths(self, paths, escape=True, use_scp=False): 136 """ 137 Given a list of file paths, encodes it as a single remote path, in 138 the style used by rsync and scp. 139 escape: add \\ to protect special characters. 140 use_scp: encode for scp if true, rsync if false. 141 """ 142 if escape: 143 paths = [utils.scp_remote_escape(path) for path in paths] 144 145 remote = self.hostname 146 147 # rsync and scp require IPv6 brackets, even when there isn't any 148 # trailing port number (ssh doesn't support IPv6 brackets). 149 # In the Python >= 3.3 future, 'import ipaddress' will parse addresses. 150 if re.search(r':.*:', remote): 151 remote = '[%s]' % remote 152 153 if use_scp: 154 return '%s@%s:"%s"' % (self.user, remote, " ".join(paths)) 155 else: 156 return '%s@%s:%s' % ( 157 self.user, remote, 158 " :".join('"%s"' % p for p in paths)) 159 160 def _encode_local_paths(self, paths, escape=True): 161 """ 162 Given a list of file paths, encodes it as a single local path. 163 escape: add \\ to protect special characters. 164 """ 165 if escape: 166 paths = [utils.sh_escape(path) for path in paths] 167 168 return " ".join('"%s"' % p for p in paths) 169 170 def _make_rsync_cmd(self, sources, dest, delete_dest, 171 preserve_symlinks, safe_symlinks): 172 """ 173 Given a string of source paths and a destination path, produces the 174 appropriate rsync command for copying them. Remote paths must be 175 pre-encoded. 176 """ 177 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 178 opts=self.master_ssh_option, 179 hosts_file=self.known_hosts_file) 180 if delete_dest: 181 delete_flag = "--delete" 182 else: 183 delete_flag = "" 184 if safe_symlinks: 185 symlink_flag = "-l --safe-links" 186 elif preserve_symlinks: 187 symlink_flag = "-l" 188 else: 189 symlink_flag = "-L" 190 command = ("rsync %s %s --timeout=1800 --rsh='%s' -az --no-o --no-g " 191 "%s \"%s\"") 192 return command % (symlink_flag, delete_flag, ssh_cmd, sources, dest) 193 194 195 def _make_ssh_cmd(self, cmd): 196 """ 197 Create a base ssh command string for the host which can be used 198 to run commands directly on the machine 199 """ 200 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 201 opts=self.master_ssh_option, 202 hosts_file=self.known_hosts_file) 203 204 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 205 206 def _make_scp_cmd(self, sources, dest): 207 """ 208 Given a string of source paths and a destination path, produces the 209 appropriate scp command for encoding it. Remote paths must be 210 pre-encoded. 211 """ 212 command = ("scp -rq %s -o StrictHostKeyChecking=no " 213 "-o UserKnownHostsFile=%s -P %d %s '%s'") 214 return command % (self.master_ssh_option, self.known_hosts_file, 215 self.port, sources, dest) 216 217 218 def _make_rsync_compatible_globs(self, path, is_local): 219 """ 220 Given an rsync-style path, returns a list of globbed paths 221 that will hopefully provide equivalent behaviour for scp. Does not 222 support the full range of rsync pattern matching behaviour, only that 223 exposed in the get/send_file interface (trailing slashes). 224 225 The is_local param is flag indicating if the paths should be 226 interpreted as local or remote paths. 227 """ 228 229 # non-trailing slash paths should just work 230 if len(path) == 0 or path[-1] != "/": 231 return [path] 232 233 # make a function to test if a pattern matches any files 234 if is_local: 235 def glob_matches_files(path, pattern): 236 return len(glob.glob(path + pattern)) > 0 237 else: 238 def glob_matches_files(path, pattern): 239 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 240 pattern), 241 stdout_tee=None, ignore_status=True) 242 return result.exit_status == 0 243 244 # take a set of globs that cover all files, and see which are needed 245 patterns = ["*", ".[!.]*"] 246 patterns = [p for p in patterns if glob_matches_files(path, p)] 247 248 # convert them into a set of paths suitable for the commandline 249 if is_local: 250 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 251 for pattern in patterns] 252 else: 253 return [utils.scp_remote_escape(path) + pattern 254 for pattern in patterns] 255 256 257 def _make_rsync_compatible_source(self, source, is_local): 258 """ 259 Applies the same logic as _make_rsync_compatible_globs, but 260 applies it to an entire list of sources, producing a new list of 261 sources, properly quoted. 262 """ 263 return sum((self._make_rsync_compatible_globs(path, is_local) 264 for path in source), []) 265 266 267 def _set_umask_perms(self, dest): 268 """ 269 Given a destination file/dir (recursively) set the permissions on 270 all the files and directories to the max allowed by running umask. 271 """ 272 273 # now this looks strange but I haven't found a way in Python to _just_ 274 # get the umask, apparently the only option is to try to set it 275 umask = os.umask(0) 276 os.umask(umask) 277 278 max_privs = 0777 & ~umask 279 280 def set_file_privs(filename): 281 """Sets mode of |filename|. Assumes |filename| exists.""" 282 file_stat = os.stat(filename) 283 284 file_privs = max_privs 285 # if the original file permissions do not have at least one 286 # executable bit then do not set it anywhere 287 if not file_stat.st_mode & 0111: 288 file_privs &= ~0111 289 290 os.chmod(filename, file_privs) 291 292 # try a bottom-up walk so changes on directory permissions won't cut 293 # our access to the files/directories inside it 294 for root, dirs, files in os.walk(dest, topdown=False): 295 # when setting the privileges we emulate the chmod "X" behaviour 296 # that sets to execute only if it is a directory or any of the 297 # owner/group/other already has execute right 298 for dirname in dirs: 299 os.chmod(os.path.join(root, dirname), max_privs) 300 301 # Filter out broken symlinks as we go. 302 for filename in filter(os.path.exists, files): 303 set_file_privs(os.path.join(root, filename)) 304 305 306 # now set privs for the dest itself 307 if os.path.isdir(dest): 308 os.chmod(dest, max_privs) 309 else: 310 set_file_privs(dest) 311 312 313 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 314 preserve_symlinks=False, retry=True, safe_symlinks=False): 315 """ 316 Copy files from the remote host to a local path. 317 318 Directories will be copied recursively. 319 If a source component is a directory with a trailing slash, 320 the content of the directory will be copied, otherwise, the 321 directory itself and its content will be copied. This 322 behavior is similar to that of the program 'rsync'. 323 324 Args: 325 source: either 326 1) a single file or directory, as a string 327 2) a list of one or more (possibly mixed) 328 files or directories 329 dest: a file or a directory (if source contains a 330 directory or more than one element, you must 331 supply a directory dest) 332 delete_dest: if this is true, the command will also clear 333 out any old files at dest that are not in the 334 source 335 preserve_perm: tells get_file() to try to preserve the sources 336 permissions on files and dirs 337 preserve_symlinks: try to preserve symlinks instead of 338 transforming them into files/dirs on copy 339 safe_symlinks: same as preserve_symlinks, but discard links 340 that may point outside the copied tree 341 Raises: 342 AutoservRunError: the scp command failed 343 """ 344 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,' 345 'preserve_perm: %s, preserve_symlinks:%s', source, dest, 346 delete_dest, preserve_perm, preserve_symlinks) 347 # Start a master SSH connection if necessary. 348 self.start_master_ssh() 349 350 if isinstance(source, basestring): 351 source = [source] 352 dest = os.path.abspath(dest) 353 354 # If rsync is disabled or fails, try scp. 355 try_scp = True 356 if self.use_rsync(): 357 logging.debug('Using Rsync.') 358 try: 359 remote_source = self._encode_remote_paths(source) 360 local_dest = utils.sh_escape(dest) 361 rsync = self._make_rsync_cmd(remote_source, local_dest, 362 delete_dest, preserve_symlinks, 363 safe_symlinks) 364 utils.run(rsync) 365 try_scp = False 366 except error.CmdError, e: 367 # retry on rsync exit values which may be caused by transient 368 # network problems: 369 # 370 # rc 10: Error in socket I/O 371 # rc 12: Error in rsync protocol data stream 372 # rc 23: Partial transfer due to error 373 # rc 255: Ssh error 374 # 375 # Note that rc 23 includes dangling symlinks. In this case 376 # retrying is useless, but not very damaging since rsync checks 377 # for those before starting the transfer (scp does not). 378 status = e.result_obj.exit_status 379 if status in [10, 12, 23, 255] and retry: 380 logging.warning('rsync status %d, retrying', status) 381 self.get_file(source, dest, delete_dest, preserve_perm, 382 preserve_symlinks, retry=False) 383 # The nested get_file() does all that's needed. 384 return 385 else: 386 logging.warning("trying scp, rsync failed: %s (%d)", 387 e, status) 388 389 if try_scp: 390 logging.debug('Trying scp.') 391 # scp has no equivalent to --delete, just drop the entire dest dir 392 if delete_dest and os.path.isdir(dest): 393 shutil.rmtree(dest) 394 os.mkdir(dest) 395 396 remote_source = self._make_rsync_compatible_source(source, False) 397 if remote_source: 398 # _make_rsync_compatible_source() already did the escaping 399 remote_source = self._encode_remote_paths( 400 remote_source, escape=False, use_scp=True) 401 local_dest = utils.sh_escape(dest) 402 scp = self._make_scp_cmd(remote_source, local_dest) 403 try: 404 utils.run(scp) 405 except error.CmdError, e: 406 logging.debug('scp failed: %s', e) 407 raise error.AutoservRunError(e.args[0], e.args[1]) 408 409 if not preserve_perm: 410 # we have no way to tell scp to not try to preserve the 411 # permissions so set them after copy instead. 412 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 413 # options are only in very recent rsync versions 414 self._set_umask_perms(dest) 415 416 417 def send_file(self, source, dest, delete_dest=False, 418 preserve_symlinks=False): 419 """ 420 Copy files from a local path to the remote host. 421 422 Directories will be copied recursively. 423 If a source component is a directory with a trailing slash, 424 the content of the directory will be copied, otherwise, the 425 directory itself and its content will be copied. This 426 behavior is similar to that of the program 'rsync'. 427 428 Args: 429 source: either 430 1) a single file or directory, as a string 431 2) a list of one or more (possibly mixed) 432 files or directories 433 dest: a file or a directory (if source contains a 434 directory or more than one element, you must 435 supply a directory dest) 436 delete_dest: if this is true, the command will also clear 437 out any old files at dest that are not in the 438 source 439 preserve_symlinks: controls if symlinks on the source will be 440 copied as such on the destination or transformed into the 441 referenced file/directory 442 443 Raises: 444 AutoservRunError: the scp command failed 445 """ 446 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,' 447 'preserve_symlinks:%s', source, dest, 448 delete_dest, preserve_symlinks) 449 # Start a master SSH connection if necessary. 450 self.start_master_ssh() 451 452 if isinstance(source, basestring): 453 source = [source] 454 455 local_sources = self._encode_local_paths(source) 456 if not local_sources: 457 raise error.TestError('source |%s| yielded an empty string' % ( 458 source)) 459 if local_sources.find('\x00') != -1: 460 raise error.TestError('one or more sources include NUL char') 461 462 # If rsync is disabled or fails, try scp. 463 try_scp = True 464 if self.use_rsync(): 465 logging.debug('Using Rsync.') 466 remote_dest = self._encode_remote_paths([dest]) 467 try: 468 rsync = self._make_rsync_cmd(local_sources, remote_dest, 469 delete_dest, preserve_symlinks, 470 False) 471 utils.run(rsync) 472 try_scp = False 473 except error.CmdError, e: 474 logging.warning("trying scp, rsync failed: %s", e) 475 476 if try_scp: 477 logging.debug('Trying scp.') 478 # scp has no equivalent to --delete, just drop the entire dest dir 479 if delete_dest: 480 is_dir = self.run("ls -d %s/" % dest, 481 ignore_status=True).exit_status == 0 482 if is_dir: 483 cmd = "rm -rf %s && mkdir %s" 484 cmd %= (dest, dest) 485 self.run(cmd) 486 487 remote_dest = self._encode_remote_paths([dest], use_scp=True) 488 local_sources = self._make_rsync_compatible_source(source, True) 489 if local_sources: 490 sources = self._encode_local_paths(local_sources, escape=False) 491 scp = self._make_scp_cmd(sources, remote_dest) 492 try: 493 utils.run(scp) 494 except error.CmdError, e: 495 logging.debug('scp failed: %s', e) 496 raise error.AutoservRunError(e.args[0], e.args[1]) 497 else: 498 logging.debug('skipping scp for empty source list') 499 500 501 def verify_ssh_user_access(self): 502 """Verify ssh access to this host. 503 504 @returns False if ssh_ping fails due to Permissions error, True 505 otherwise. 506 """ 507 try: 508 self.ssh_ping() 509 except (error.AutoservSshPermissionDeniedError, 510 error.AutoservSshPingHostError): 511 return False 512 return True 513 514 515 def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'): 516 """ 517 Pings remote host via ssh. 518 519 @param timeout: Time in seconds before giving up. 520 Defaults to 60 seconds. 521 @param base_cmd: The base command to run with the ssh ping. 522 Defaults to true. 523 @raise AutoservSSHTimeout: If the ssh ping times out. 524 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 525 permissions. 526 @raise AutoservSshPingHostError: For other AutoservRunErrors. 527 """ 528 ctimeout = min(timeout, connect_timeout or timeout) 529 try: 530 self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout, 531 ssh_failure_retry_ok=True) 532 except error.AutoservSSHTimeout: 533 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 534 raise error.AutoservSSHTimeout(msg) 535 except error.AutoservSshPermissionDeniedError: 536 #let AutoservSshPermissionDeniedError be visible to the callers 537 raise 538 except error.AutoservRunError, e: 539 # convert the generic AutoservRunError into something more 540 # specific for this context 541 raise error.AutoservSshPingHostError(e.description + '\n' + 542 repr(e.result_obj)) 543 544 545 def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'): 546 """ 547 Check if the remote host is up by ssh-ing and running a base command. 548 549 @param timeout: timeout in seconds. 550 @param base_cmd: a base command to run with ssh. The default is 'true'. 551 @returns True if the remote host is up before the timeout expires, 552 False otherwise. 553 """ 554 try: 555 self.ssh_ping(timeout=timeout, 556 connect_timeout=connect_timeout, 557 base_cmd=base_cmd) 558 except error.AutoservError: 559 return False 560 else: 561 return True 562 563 564 def wait_up(self, timeout=None): 565 """ 566 Wait until the remote host is up or the timeout expires. 567 568 In fact, it will wait until an ssh connection to the remote 569 host can be established, and getty is running. 570 571 @param timeout time limit in seconds before returning even 572 if the host is not up. 573 574 @returns True if the host was found to be up before the timeout expires, 575 False otherwise 576 """ 577 if timeout: 578 current_time = int(time.time()) 579 end_time = current_time + timeout 580 581 autoserv_error_logged = False 582 while not timeout or current_time < end_time: 583 if self.is_up(timeout=end_time - current_time, 584 connect_timeout=20): 585 try: 586 if self.are_wait_up_processes_up(): 587 logging.debug('Host %s is now up', self.hostname) 588 return True 589 except error.AutoservError as e: 590 if not autoserv_error_logged: 591 logging.debug('Ignoring failure to reach %s: %s %s', 592 self.hostname, e, 593 '(and further similar failures)') 594 autoserv_error_logged = True 595 time.sleep(1) 596 current_time = int(time.time()) 597 598 logging.debug('Host %s is still down after waiting %d seconds', 599 self.hostname, int(timeout + time.time() - end_time)) 600 return False 601 602 603 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None): 604 """ 605 Wait until the remote host is down or the timeout expires. 606 607 If old_boot_id is provided, this will wait until either the machine 608 is unpingable or self.get_boot_id() returns a value different from 609 old_boot_id. If the boot_id value has changed then the function 610 returns true under the assumption that the machine has shut down 611 and has now already come back up. 612 613 If old_boot_id is None then until the machine becomes unreachable the 614 method assumes the machine has not yet shut down. 615 616 Based on this definition, the 4 possible permutations of timeout 617 and old_boot_id are: 618 1. timeout and old_boot_id: wait timeout seconds for either the 619 host to become unpingable, or the boot id 620 to change. In the latter case we've rebooted 621 and in the former case we've only shutdown, 622 but both cases return True. 623 2. only timeout: wait timeout seconds for the host to become unpingable. 624 If the host remains pingable throughout timeout seconds 625 we return False. 626 3. only old_boot_id: wait forever until either the host becomes 627 unpingable or the boot_id changes. Return true 628 when either of those conditions are met. 629 4. not timeout, not old_boot_id: wait forever till the host becomes 630 unpingable. 631 632 @param timeout Time limit in seconds before returning even 633 if the host is still up. 634 @param warning_timer Time limit in seconds that will generate 635 a warning if the host is not down yet. 636 @param old_boot_id A string containing the result of self.get_boot_id() 637 prior to the host being told to shut down. Can be None if this is 638 not available. 639 640 @returns True if the host was found to be down, False otherwise 641 """ 642 #TODO: there is currently no way to distinguish between knowing 643 #TODO: boot_id was unsupported and not knowing the boot_id. 644 current_time = int(time.time()) 645 if timeout: 646 end_time = current_time + timeout 647 648 if warning_timer: 649 warn_time = current_time + warning_timer 650 651 if old_boot_id is not None: 652 logging.debug('Host %s pre-shutdown boot_id is %s', 653 self.hostname, old_boot_id) 654 655 # Impose semi real-time deadline constraints, since some clients 656 # (eg: watchdog timer tests) expect strict checking of time elapsed. 657 # Each iteration of this loop is treated as though it atomically 658 # completes within current_time, this is needed because if we used 659 # inline time.time() calls instead then the following could happen: 660 # 661 # while not timeout or time.time() < end_time: [23 < 30] 662 # some code. [takes 10 secs] 663 # try: 664 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 665 # [30 - 33] 666 # The last step will lead to a return True, when in fact the machine 667 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 668 # the same time that allowed us into that iteration of the loop. 669 while not timeout or current_time < end_time: 670 try: 671 new_boot_id = self.get_boot_id(timeout=end_time-current_time) 672 except error.AutoservError: 673 logging.debug('Host %s is now unreachable over ssh, is down', 674 self.hostname) 675 return True 676 else: 677 # if the machine is up but the boot_id value has changed from 678 # old boot id, then we can assume the machine has gone down 679 # and then already come back up 680 if old_boot_id is not None and old_boot_id != new_boot_id: 681 logging.debug('Host %s now has boot_id %s and so must ' 682 'have rebooted', self.hostname, new_boot_id) 683 return True 684 685 if warning_timer and current_time > warn_time: 686 self.record("INFO", None, "shutdown", 687 "Shutdown took longer than %ds" % warning_timer) 688 # Print the warning only once. 689 warning_timer = None 690 # If a machine is stuck switching runlevels 691 # This may cause the machine to reboot. 692 self.run('kill -HUP 1', ignore_status=True) 693 694 time.sleep(1) 695 current_time = int(time.time()) 696 697 return False 698 699 700 # tunable constants for the verify & repair code 701 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 702 "gb_diskspace_required", 703 type=float, 704 default=20.0) 705 706 707 def verify_connectivity(self): 708 super(AbstractSSHHost, self).verify_connectivity() 709 710 logging.info('Pinging host ' + self.hostname) 711 self.ssh_ping() 712 logging.info("Host (ssh) %s is alive", self.hostname) 713 714 if self.is_shutting_down(): 715 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 716 717 718 def verify_software(self): 719 super(AbstractSSHHost, self).verify_software() 720 try: 721 self.check_diskspace(autotest.Autotest.get_install_dir(self), 722 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 723 except error.AutoservHostError: 724 raise # only want to raise if it's a space issue 725 except autotest.AutodirNotFoundError: 726 # autotest dir may not exist, etc. ignore 727 logging.debug('autodir space check exception, this is probably ' 728 'safe to ignore\n' + traceback.format_exc()) 729 730 731 def close(self): 732 super(AbstractSSHHost, self).close() 733 self.rpc_server_tracker.disconnect_all() 734 self._cleanup_master_ssh() 735 if os.path.exists(self.known_hosts_file): 736 os.remove(self.known_hosts_file) 737 738 739 def restart_master_ssh(self): 740 """ 741 Stop and restart the ssh master connection. This is meant as a last 742 resort when ssh commands fail and we don't understand why. 743 """ 744 logging.debug('Restarting master ssh connection') 745 self._cleanup_master_ssh() 746 self.start_master_ssh(timeout=30) 747 748 749 def _cleanup_master_ssh(self): 750 """ 751 Release all resources (process, temporary directory) used by an active 752 master SSH connection. 753 """ 754 # If a master SSH connection is running, kill it. 755 if self.master_ssh_job is not None: 756 logging.debug('Nuking master_ssh_job.') 757 utils.nuke_subprocess(self.master_ssh_job.sp) 758 self.master_ssh_job = None 759 760 # Remove the temporary directory for the master SSH socket. 761 if self.master_ssh_tempdir is not None: 762 logging.debug('Cleaning master_ssh_tempdir.') 763 self.master_ssh_tempdir.clean() 764 self.master_ssh_tempdir = None 765 self.master_ssh_option = '' 766 767 768 def start_master_ssh(self, timeout=5): 769 """ 770 Called whenever a slave SSH connection needs to be initiated (e.g., by 771 run, rsync, scp). If master SSH support is enabled and a master SSH 772 connection is not active already, start a new one in the background. 773 Also, cleanup any zombie master SSH connections (e.g., dead due to 774 reboot). 775 776 timeout: timeout in seconds (default 5) to wait for master ssh 777 connection to be established. If timeout is reached, a 778 warning message is logged, but no other action is taken. 779 """ 780 if not enable_master_ssh: 781 return 782 783 # Multiple processes might try in parallel to clean up the old master 784 # ssh connection and create a new one, therefore use a lock to protect 785 # against race conditions. 786 with self._lock: 787 # If a previously started master SSH connection is not running 788 # anymore, it needs to be cleaned up and then restarted. 789 if self.master_ssh_job is not None: 790 socket_path = os.path.join(self.master_ssh_tempdir.name, 791 'socket') 792 if (not os.path.exists(socket_path) or 793 self.master_ssh_job.sp.poll() is not None): 794 logging.info("Master ssh connection to %s is down.", 795 self.hostname) 796 self._cleanup_master_ssh() 797 798 # Start a new master SSH connection. 799 if self.master_ssh_job is None: 800 # Create a shared socket in a temp location. 801 self.master_ssh_tempdir = autotemp.tempdir( 802 unique_id='ssh-master') 803 self.master_ssh_option = ("-o ControlPath=%s/socket" % 804 self.master_ssh_tempdir.name) 805 806 # Start the master SSH connection in the background. 807 master_cmd = self.ssh_command( 808 options="-N -o ControlMaster=yes") 809 logging.info("Starting master ssh connection '%s'", master_cmd) 810 self.master_ssh_job = utils.BgJob(master_cmd, 811 nickname='master-ssh', 812 no_pipes=True) 813 # To prevent a race between the the master ssh connection 814 # startup and its first attempted use, wait for socket file to 815 # exist before returning. 816 end_time = time.time() + timeout 817 socket_file_path = os.path.join(self.master_ssh_tempdir.name, 818 'socket') 819 while time.time() < end_time: 820 if os.path.exists(socket_file_path): 821 break 822 time.sleep(.2) 823 else: 824 logging.info('Timed out waiting for master-ssh connection ' 825 'to be established.') 826 827 828 def clear_known_hosts(self): 829 """Clears out the temporary ssh known_hosts file. 830 831 This is useful if the test SSHes to the machine, then reinstalls it, 832 then SSHes to it again. It can be called after the reinstall to 833 reduce the spam in the logs. 834 """ 835 logging.info("Clearing known hosts for host '%s', file '%s'.", 836 self.hostname, self.known_hosts_file) 837 # Clear out the file by opening it for writing and then closing. 838 fh = open(self.known_hosts_file, "w") 839 fh.close() 840 841 842 def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True): 843 """Copy log directories from a host to a local directory. 844 845 @param remote_src_dir: A destination directory on the host. 846 @param local_dest_dir: A path to a local destination directory. 847 If it doesn't exist it will be created. 848 @param ignore_errors: If True, ignore exceptions. 849 850 @raises OSError: If there were problems creating the local_dest_dir and 851 ignore_errors is False. 852 @raises AutoservRunError, AutotestRunError: If something goes wrong 853 while copying the directories and ignore_errors is False. 854 """ 855 locally_created_dest = False 856 if (not os.path.exists(local_dest_dir) 857 or not os.path.isdir(local_dest_dir)): 858 try: 859 os.makedirs(local_dest_dir) 860 locally_created_dest = True 861 except OSError as e: 862 logging.warning('Unable to collect logs from host ' 863 '%s: %s', self.hostname, e) 864 if not ignore_errors: 865 raise 866 return 867 try: 868 self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True) 869 except (error.AutotestRunError, error.AutoservRunError, 870 error.AutoservSSHTimeout) as e: 871 logging.warning('Collection of %s to local dir %s from host %s ' 872 'failed: %s', remote_src_dir, local_dest_dir, 873 self.hostname, e) 874 if locally_created_dest: 875 shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors) 876 if not ignore_errors: 877 raise 878 879 880 def create_ssh_tunnel(self, port, local_port): 881 """Create an ssh tunnel from local_port to port. 882 883 This is used to forward a port securely through a tunnel process from 884 the server to the DUT for RPC server connection. 885 886 @param port: remote port on the host. 887 @param local_port: local forwarding port. 888 889 @return: the tunnel process. 890 """ 891 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port) 892 ssh_cmd = self.make_ssh_command(opts=tunnel_options) 893 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname) 894 logging.debug('Full tunnel command: %s', tunnel_cmd) 895 # Exec the ssh process directly here rather than using a shell. 896 # Using a shell leaves a dangling ssh process, because we deliver 897 # signals to the shell wrapping ssh, not the ssh process itself. 898 args = shlex.split(tunnel_cmd) 899 tunnel_proc = subprocess.Popen(args, close_fds=True) 900 logging.debug('Started ssh tunnel, local = %d' 901 ' remote = %d, pid = %d', 902 local_port, port, tunnel_proc.pid) 903 return tunnel_proc 904 905 906 def disconnect_ssh_tunnel(self, tunnel_proc, port): 907 """ 908 Disconnects a previously forwarded port from the server to the DUT for 909 RPC server connection. 910 911 @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|. 912 @param port: remote port on the DUT, used in ADBHost. 913 914 """ 915 if tunnel_proc.poll() is None: 916 tunnel_proc.terminate() 917 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid) 918 else: 919 logging.debug('Tunnel pid %d terminated early, status %d', 920 tunnel_proc.pid, tunnel_proc.returncode) 921 922 923 def get_os_type(self): 924 """Returns the host OS descriptor (to be implemented in subclasses). 925 926 @return A string describing the OS type. 927 """ 928 raise NotImplementedError 929