1import os, time, socket, shutil, glob, logging, tempfile, re
2import shlex
3import subprocess
4
5from autotest_lib.client.bin.result_tools import runner as result_tools_runner
6from autotest_lib.client.common_lib import error
7from autotest_lib.client.common_lib.cros.network import ping_runner
8from autotest_lib.client.common_lib.global_config import global_config
9from autotest_lib.server import utils, autotest
10from autotest_lib.server.hosts import host_info
11from autotest_lib.server.hosts import remote
12from autotest_lib.server.hosts import rpc_server_tracker
13from autotest_lib.server.hosts import ssh_multiplex
14
15# pylint: disable=C0111
16
17get_value = global_config.get_config_value
18enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
19                              default=False)
20
21# Number of seconds to use the cached up status.
22_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300
23_DEFAULT_SSH_PORT = 22
24
25# Number of seconds to wait for the host to shut down in wait_down().
26_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120
27
28# Number of seconds to wait for the host to boot up in wait_up().
29_DEFAULT_WAIT_UP_TIME_SECONDS = 120
30
31# Timeout in seconds for a single call of get_boot_id() in wait_down()
32# and a single ssh ping in wait_up().
33_DEFAULT_MAX_PING_TIMEOUT = 10
34
35class AbstractSSHHost(remote.RemoteHost):
36    """
37    This class represents a generic implementation of most of the
38    framework necessary for controlling a host via ssh. It implements
39    almost all of the abstract Host methods, except for the core
40    Host.run method.
41    """
42    VERSION_PREFIX = ''
43    # Timeout for master ssh connection setup, in seconds.
44    DEFAULT_START_MASTER_SSH_TIMEOUT_S = 5
45
46    def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT,
47                    password="", is_client_install_supported=True,
48                    afe_host=None, host_info_store=None, connection_pool=None,
49                    *args, **dargs):
50        super(AbstractSSHHost, self)._initialize(hostname=hostname,
51                                                 *args, **dargs)
52        """
53        @param hostname: The hostname of the host.
54        @param user: The username to use when ssh'ing into the host.
55        @param password: The password to use when ssh'ing into the host.
56        @param port: The port to use for ssh.
57        @param is_client_install_supported: Boolean to indicate if we can
58                install autotest on the host.
59        @param afe_host: The host object attained from the AFE (get_hosts).
60        @param host_info_store: Optional host_info.CachingHostInfoStore object
61                to obtain / update host information.
62        @param connection_pool: ssh_multiplex.ConnectionPool instance to share
63                the master ssh connection across control scripts.
64        """
65        # IP address is retrieved only on demand. Otherwise the host
66        # initialization will fail for host is not online.
67        self._ip = None
68        self.user = user
69        self.port = port
70        self.password = password
71        self._is_client_install_supported = is_client_install_supported
72        self._use_rsync = None
73        self.known_hosts_file = tempfile.mkstemp()[1]
74        self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self);
75
76        """
77        Master SSH connection background job, socket temp directory and socket
78        control path option. If master-SSH is enabled, these fields will be
79        initialized by start_master_ssh when a new SSH connection is initiated.
80        """
81        self._connection_pool = connection_pool
82        if connection_pool:
83            self._master_ssh = connection_pool.get(hostname, user, port)
84        else:
85            self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port)
86
87        self._afe_host = afe_host or utils.EmptyAFEHost()
88        self.host_info_store = (host_info_store or
89                                host_info.InMemoryHostInfoStore())
90
91        # The cached status of whether the DUT responded to ping.
92        self._cached_up_status = None
93        # The timestamp when the value of _cached_up_status is set.
94        self._cached_up_status_updated = None
95
96
97    @property
98    def ip(self):
99        """@return IP address of the host.
100        """
101        if not self._ip:
102            self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
103        return self._ip
104
105
106    @property
107    def is_client_install_supported(self):
108        """"
109        Returns True if the host supports autotest client installs, False
110        otherwise.
111        """
112        return self._is_client_install_supported
113
114
115    @property
116    def rpc_server_tracker(self):
117        """"
118        @return The RPC server tracker associated with this host.
119        """
120        return self._rpc_server_tracker
121
122
123    @property
124    def is_default_port(self):
125      """Returns True if its port is default SSH port."""
126      return self.port == _DEFAULT_SSH_PORT
127
128    @property
129    def host_port(self):
130        """Returns hostname if port is default. Otherwise, hostname:port.
131        """
132        if self.is_default_port:
133            return self.hostname
134        else:
135            return '%s:%d' % (self.hostname, self.port)
136
137
138    # Though it doesn't use self here, it is not declared as staticmethod
139    # because its subclass may use self to access member variables.
140    def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='',
141                         hosts_file='/dev/null', connect_timeout=30,
142                         alive_interval=300, alive_count_max=3,
143                         connection_attempts=1):
144        ssh_options = " ".join([
145            opts,
146            self.make_ssh_options(
147                hosts_file=hosts_file, connect_timeout=connect_timeout,
148                alive_interval=alive_interval, alive_count_max=alive_count_max,
149                connection_attempts=connection_attempts)])
150        return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port)
151
152
153    @staticmethod
154    def make_ssh_options(hosts_file='/dev/null', connect_timeout=30,
155                         alive_interval=300, alive_count_max=3,
156                         connection_attempts=1):
157        """Composes SSH -o options."""
158        assert isinstance(connect_timeout, (int, long))
159        assert connect_timeout > 0 # can't disable the timeout
160
161        options = [("StrictHostKeyChecking", "no"),
162                   ("UserKnownHostsFile", hosts_file),
163                   ("BatchMode", "yes"),
164                   ("ConnectTimeout", str(connect_timeout)),
165                   ("ServerAliveInterval", str(alive_interval)),
166                   ("ServerAliveCountMax", str(alive_count_max)),
167                   ("ConnectionAttempts", str(connection_attempts))]
168        return " ".join("-o %s=%s" % kv for kv in options)
169
170
171    def use_rsync(self):
172        if self._use_rsync is not None:
173            return self._use_rsync
174
175        # Check if rsync is available on the remote host. If it's not,
176        # don't try to use it for any future file transfers.
177        self._use_rsync = self.check_rsync()
178        if not self._use_rsync:
179            logging.warning("rsync not available on remote host %s -- disabled",
180                            self.host_port)
181        return self._use_rsync
182
183
184    def check_rsync(self):
185        """
186        Check if rsync is available on the remote host.
187        """
188        try:
189            self.run("rsync --version", stdout_tee=None, stderr_tee=None)
190        except error.AutoservRunError:
191            return False
192        return True
193
194
195    def _encode_remote_paths(self, paths, escape=True, use_scp=False):
196        """
197        Given a list of file paths, encodes it as a single remote path, in
198        the style used by rsync and scp.
199        escape: add \\ to protect special characters.
200        use_scp: encode for scp if true, rsync if false.
201        """
202        if escape:
203            paths = [utils.scp_remote_escape(path) for path in paths]
204
205        remote = self.hostname
206
207        # rsync and scp require IPv6 brackets, even when there isn't any
208        # trailing port number (ssh doesn't support IPv6 brackets).
209        # In the Python >= 3.3 future, 'import ipaddress' will parse addresses.
210        if re.search(r':.*:', remote):
211            remote = '[%s]' % remote
212
213        if use_scp:
214            return '%s@%s:"%s"' % (self.user, remote, " ".join(paths))
215        else:
216            return '%s@%s:%s' % (
217                    self.user, remote,
218                    " :".join('"%s"' % p for p in paths))
219
220    def _encode_local_paths(self, paths, escape=True):
221        """
222        Given a list of file paths, encodes it as a single local path.
223        escape: add \\ to protect special characters.
224        """
225        if escape:
226            paths = [utils.sh_escape(path) for path in paths]
227
228        return " ".join('"%s"' % p for p in paths)
229
230
231    def rsync_options(self, delete_dest=False, preserve_symlinks=False,
232                      safe_symlinks=False, excludes=None):
233        """Obtains rsync options for the remote."""
234        ssh_cmd = self.make_ssh_command(user=self.user, port=self.port,
235                                        opts=self._master_ssh.ssh_option,
236                                        hosts_file=self.known_hosts_file)
237        if delete_dest:
238            delete_flag = "--delete"
239        else:
240            delete_flag = ""
241        if safe_symlinks:
242            symlink_flag = "-l --safe-links"
243        elif preserve_symlinks:
244            symlink_flag = "-l"
245        else:
246            symlink_flag = "-L"
247        exclude_args = ''
248        if excludes:
249            exclude_args = ' '.join(
250                    ["--exclude '%s'" % exclude for exclude in excludes])
251        return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % (
252            symlink_flag, delete_flag, ssh_cmd, exclude_args)
253
254
255    def _make_rsync_cmd(self, sources, dest, delete_dest,
256                        preserve_symlinks, safe_symlinks, excludes=None):
257        """
258        Given a string of source paths and a destination path, produces the
259        appropriate rsync command for copying them. Remote paths must be
260        pre-encoded.
261        """
262        rsync_options = self.rsync_options(
263            delete_dest=delete_dest, preserve_symlinks=preserve_symlinks,
264            safe_symlinks=safe_symlinks, excludes=excludes)
265        return 'rsync %s %s "%s"' % (rsync_options, sources, dest)
266
267
268    def _make_ssh_cmd(self, cmd):
269        """
270        Create a base ssh command string for the host which can be used
271        to run commands directly on the machine
272        """
273        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
274                                         opts=self._master_ssh.ssh_option,
275                                         hosts_file=self.known_hosts_file)
276
277        return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
278
279    def _make_scp_cmd(self, sources, dest):
280        """
281        Given a string of source paths and a destination path, produces the
282        appropriate scp command for encoding it. Remote paths must be
283        pre-encoded.
284        """
285        command = ("scp -rq %s -o StrictHostKeyChecking=no "
286                   "-o UserKnownHostsFile=%s -P %d %s '%s'")
287        return command % (self._master_ssh.ssh_option, self.known_hosts_file,
288                          self.port, sources, dest)
289
290
291    def _make_rsync_compatible_globs(self, path, is_local):
292        """
293        Given an rsync-style path, returns a list of globbed paths
294        that will hopefully provide equivalent behaviour for scp. Does not
295        support the full range of rsync pattern matching behaviour, only that
296        exposed in the get/send_file interface (trailing slashes).
297
298        The is_local param is flag indicating if the paths should be
299        interpreted as local or remote paths.
300        """
301
302        # non-trailing slash paths should just work
303        if len(path) == 0 or path[-1] != "/":
304            return [path]
305
306        # make a function to test if a pattern matches any files
307        if is_local:
308            def glob_matches_files(path, pattern):
309                return len(glob.glob(path + pattern)) > 0
310        else:
311            def glob_matches_files(path, pattern):
312                result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
313                                                   pattern),
314                                  stdout_tee=None, ignore_status=True)
315                return result.exit_status == 0
316
317        # take a set of globs that cover all files, and see which are needed
318        patterns = ["*", ".[!.]*"]
319        patterns = [p for p in patterns if glob_matches_files(path, p)]
320
321        # convert them into a set of paths suitable for the commandline
322        if is_local:
323            return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
324                    for pattern in patterns]
325        else:
326            return [utils.scp_remote_escape(path) + pattern
327                    for pattern in patterns]
328
329
330    def _make_rsync_compatible_source(self, source, is_local):
331        """
332        Applies the same logic as _make_rsync_compatible_globs, but
333        applies it to an entire list of sources, producing a new list of
334        sources, properly quoted.
335        """
336        return sum((self._make_rsync_compatible_globs(path, is_local)
337                    for path in source), [])
338
339
340    def _set_umask_perms(self, dest):
341        """
342        Given a destination file/dir (recursively) set the permissions on
343        all the files and directories to the max allowed by running umask.
344        """
345
346        # now this looks strange but I haven't found a way in Python to _just_
347        # get the umask, apparently the only option is to try to set it
348        umask = os.umask(0)
349        os.umask(umask)
350
351        max_privs = 0777 & ~umask
352
353        def set_file_privs(filename):
354            """Sets mode of |filename|.  Assumes |filename| exists."""
355            file_stat = os.stat(filename)
356
357            file_privs = max_privs
358            # if the original file permissions do not have at least one
359            # executable bit then do not set it anywhere
360            if not file_stat.st_mode & 0111:
361                file_privs &= ~0111
362
363            os.chmod(filename, file_privs)
364
365        # try a bottom-up walk so changes on directory permissions won't cut
366        # our access to the files/directories inside it
367        for root, dirs, files in os.walk(dest, topdown=False):
368            # when setting the privileges we emulate the chmod "X" behaviour
369            # that sets to execute only if it is a directory or any of the
370            # owner/group/other already has execute right
371            for dirname in dirs:
372                os.chmod(os.path.join(root, dirname), max_privs)
373
374            # Filter out broken symlinks as we go.
375            for filename in filter(os.path.exists, files):
376                set_file_privs(os.path.join(root, filename))
377
378
379        # now set privs for the dest itself
380        if os.path.isdir(dest):
381            os.chmod(dest, max_privs)
382        else:
383            set_file_privs(dest)
384
385
386    def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
387                 preserve_symlinks=False, retry=True, safe_symlinks=False):
388        """
389        Copy files from the remote host to a local path.
390
391        Directories will be copied recursively.
392        If a source component is a directory with a trailing slash,
393        the content of the directory will be copied, otherwise, the
394        directory itself and its content will be copied. This
395        behavior is similar to that of the program 'rsync'.
396
397        Args:
398                source: either
399                        1) a single file or directory, as a string
400                        2) a list of one or more (possibly mixed)
401                                files or directories
402                dest: a file or a directory (if source contains a
403                        directory or more than one element, you must
404                        supply a directory dest)
405                delete_dest: if this is true, the command will also clear
406                             out any old files at dest that are not in the
407                             source
408                preserve_perm: tells get_file() to try to preserve the sources
409                               permissions on files and dirs
410                preserve_symlinks: try to preserve symlinks instead of
411                                   transforming them into files/dirs on copy
412                safe_symlinks: same as preserve_symlinks, but discard links
413                               that may point outside the copied tree
414        Raises:
415                AutoservRunError: the scp command failed
416        """
417        logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,'
418                      'preserve_perm: %s, preserve_symlinks:%s', source, dest,
419                      delete_dest, preserve_perm, preserve_symlinks)
420
421        # Start a master SSH connection if necessary.
422        self.start_master_ssh()
423
424        if isinstance(source, basestring):
425            source = [source]
426        dest = os.path.abspath(dest)
427
428        # If rsync is disabled or fails, try scp.
429        try_scp = True
430        if self.use_rsync():
431            logging.debug('Using Rsync.')
432            try:
433                remote_source = self._encode_remote_paths(source)
434                local_dest = utils.sh_escape(dest)
435                rsync = self._make_rsync_cmd(remote_source, local_dest,
436                                             delete_dest, preserve_symlinks,
437                                             safe_symlinks)
438                utils.run(rsync)
439                try_scp = False
440            except error.CmdError, e:
441                # retry on rsync exit values which may be caused by transient
442                # network problems:
443                #
444                # rc 10: Error in socket I/O
445                # rc 12: Error in rsync protocol data stream
446                # rc 23: Partial transfer due to error
447                # rc 255: Ssh error
448                #
449                # Note that rc 23 includes dangling symlinks.  In this case
450                # retrying is useless, but not very damaging since rsync checks
451                # for those before starting the transfer (scp does not).
452                status = e.result_obj.exit_status
453                if status in [10, 12, 23, 255] and retry:
454                    logging.warning('rsync status %d, retrying', status)
455                    self.get_file(source, dest, delete_dest, preserve_perm,
456                                  preserve_symlinks, retry=False)
457                    # The nested get_file() does all that's needed.
458                    return
459                else:
460                    logging.warning("trying scp, rsync failed: %s (%d)",
461                                     e, status)
462
463        if try_scp:
464            logging.debug('Trying scp.')
465            # scp has no equivalent to --delete, just drop the entire dest dir
466            if delete_dest and os.path.isdir(dest):
467                shutil.rmtree(dest)
468                os.mkdir(dest)
469
470            remote_source = self._make_rsync_compatible_source(source, False)
471            if remote_source:
472                # _make_rsync_compatible_source() already did the escaping
473                remote_source = self._encode_remote_paths(
474                        remote_source, escape=False, use_scp=True)
475                local_dest = utils.sh_escape(dest)
476                scp = self._make_scp_cmd(remote_source, local_dest)
477                try:
478                    utils.run(scp)
479                except error.CmdError, e:
480                    logging.debug('scp failed: %s', e)
481                    raise error.AutoservRunError(e.args[0], e.args[1])
482
483        if not preserve_perm:
484            # we have no way to tell scp to not try to preserve the
485            # permissions so set them after copy instead.
486            # for rsync we could use "--no-p --chmod=ugo=rwX" but those
487            # options are only in very recent rsync versions
488            self._set_umask_perms(dest)
489
490
491    def send_file(self, source, dest, delete_dest=False,
492                  preserve_symlinks=False, excludes=None):
493        """
494        Copy files from a local path to the remote host.
495
496        Directories will be copied recursively.
497        If a source component is a directory with a trailing slash,
498        the content of the directory will be copied, otherwise, the
499        directory itself and its content will be copied. This
500        behavior is similar to that of the program 'rsync'.
501
502        Args:
503                source: either
504                        1) a single file or directory, as a string
505                        2) a list of one or more (possibly mixed)
506                                files or directories
507                dest: a file or a directory (if source contains a
508                        directory or more than one element, you must
509                        supply a directory dest)
510                delete_dest: if this is true, the command will also clear
511                             out any old files at dest that are not in the
512                             source
513                preserve_symlinks: controls if symlinks on the source will be
514                    copied as such on the destination or transformed into the
515                    referenced file/directory
516                excludes: A list of file pattern that matches files not to be
517                          sent. `send_file` will fail if exclude is set, since
518                          local copy does not support --exclude, e.g., when
519                          using scp to copy file.
520
521        Raises:
522                AutoservRunError: the scp command failed
523        """
524        logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,'
525                      'preserve_symlinks:%s', source, dest,
526                      delete_dest, preserve_symlinks)
527        # Start a master SSH connection if necessary.
528        self.start_master_ssh()
529
530        if isinstance(source, basestring):
531            source = [source]
532
533        local_sources = self._encode_local_paths(source)
534        if not local_sources:
535            raise error.TestError('source |%s| yielded an empty string' % (
536                source))
537        if local_sources.find('\x00') != -1:
538            raise error.TestError('one or more sources include NUL char')
539
540        # If rsync is disabled or fails, try scp.
541        try_scp = True
542        if self.use_rsync():
543            logging.debug('Using Rsync.')
544            remote_dest = self._encode_remote_paths([dest])
545            try:
546                rsync = self._make_rsync_cmd(local_sources, remote_dest,
547                                             delete_dest, preserve_symlinks,
548                                             False, excludes=excludes)
549                utils.run(rsync)
550                try_scp = False
551            except error.CmdError, e:
552                logging.warning("trying scp, rsync failed: %s", e)
553
554        if try_scp:
555            logging.debug('Trying scp.')
556            if excludes:
557                raise error.AutotestHostRunError(
558                        '--exclude is not supported in scp, try to use rsync. '
559                        'excludes: %s' % ','.join(excludes), None)
560            # scp has no equivalent to --delete, just drop the entire dest dir
561            if delete_dest:
562                is_dir = self.run("ls -d %s/" % dest,
563                                  ignore_status=True).exit_status == 0
564                if is_dir:
565                    cmd = "rm -rf %s && mkdir %s"
566                    cmd %= (dest, dest)
567                    self.run(cmd)
568
569            remote_dest = self._encode_remote_paths([dest], use_scp=True)
570            local_sources = self._make_rsync_compatible_source(source, True)
571            if local_sources:
572                sources = self._encode_local_paths(local_sources, escape=False)
573                scp = self._make_scp_cmd(sources, remote_dest)
574                try:
575                    utils.run(scp)
576                except error.CmdError, e:
577                    logging.debug('scp failed: %s', e)
578                    raise error.AutoservRunError(e.args[0], e.args[1])
579            else:
580                logging.debug('skipping scp for empty source list')
581
582
583    def verify_ssh_user_access(self):
584        """Verify ssh access to this host.
585
586        @returns False if ssh_ping fails due to Permissions error, True
587                 otherwise.
588        """
589        try:
590            self.ssh_ping()
591        except (error.AutoservSshPermissionDeniedError,
592                error.AutoservSshPingHostError):
593            return False
594        return True
595
596
597    def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'):
598        """
599        Pings remote host via ssh.
600
601        @param timeout: Command execution timeout in seconds.
602                        Defaults to 60 seconds.
603        @param connect_timeout: ssh connection timeout in seconds.
604        @param base_cmd: The base command to run with the ssh ping.
605                         Defaults to true.
606        @raise AutoservSSHTimeout: If the ssh ping times out.
607        @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
608                                                 permissions.
609        @raise AutoservSshPingHostError: For other AutoservRunErrors.
610        """
611        ctimeout = min(timeout, connect_timeout or timeout)
612        try:
613            self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout,
614                     ssh_failure_retry_ok=True)
615        except error.AutoservSSHTimeout:
616            msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
617            raise error.AutoservSSHTimeout(msg)
618        except error.AutoservSshPermissionDeniedError:
619            #let AutoservSshPermissionDeniedError be visible to the callers
620            raise
621        except error.AutoservRunError, e:
622            # convert the generic AutoservRunError into something more
623            # specific for this context
624            raise error.AutoservSshPingHostError(e.description + '\n' +
625                                                 repr(e.result_obj))
626
627
628    def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'):
629        """
630        Check if the remote host is up by ssh-ing and running a base command.
631
632        @param timeout: command execution timeout in seconds.
633        @param connect_timeout: ssh connection timeout in seconds.
634        @param base_cmd: a base command to run with ssh. The default is 'true'.
635        @returns True if the remote host is up before the timeout expires,
636                 False otherwise.
637        """
638        try:
639            self.ssh_ping(timeout=timeout,
640                          connect_timeout=connect_timeout,
641                          base_cmd=base_cmd)
642        except error.AutoservError:
643            return False
644        else:
645            return True
646
647
648    def is_up_fast(self):
649        """Return True if the host can be pinged."""
650        ping_config = ping_runner.PingConfig(
651                self.hostname, count=3, ignore_result=True, ignore_status=True)
652        return ping_runner.PingRunner().ping(ping_config).received > 0
653
654
655    def wait_up(self, timeout=_DEFAULT_WAIT_UP_TIME_SECONDS):
656        """
657        Wait until the remote host is up or the timeout expires.
658
659        In fact, it will wait until an ssh connection to the remote
660        host can be established, and getty is running.
661
662        @param timeout time limit in seconds before returning even
663            if the host is not up.
664
665        @returns True if the host was found to be up before the timeout expires,
666                 False otherwise
667        """
668        current_time = int(time.time())
669        end_time = current_time + timeout
670
671        autoserv_error_logged = False
672        while current_time < end_time:
673            ping_timeout = min(_DEFAULT_MAX_PING_TIMEOUT,
674                               end_time - current_time)
675            if self.is_up(timeout=ping_timeout, connect_timeout=ping_timeout):
676                try:
677                    if self.are_wait_up_processes_up():
678                        logging.debug('Host %s is now up', self.host_port)
679                        return True
680                except error.AutoservError as e:
681                    if not autoserv_error_logged:
682                        logging.debug('Ignoring failure to reach %s: %s %s',
683                                      self.host_port, e,
684                                      '(and further similar failures)')
685                        autoserv_error_logged = True
686            time.sleep(1)
687            current_time = int(time.time())
688
689        logging.debug('Host %s is still down after waiting %d seconds',
690                      self.host_port, int(timeout + time.time() - end_time))
691        return False
692
693
694    def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS,
695                  warning_timer=None, old_boot_id=None,
696                  max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT):
697        """
698        Wait until the remote host is down or the timeout expires.
699
700        If old_boot_id is provided, waits until either the machine is
701        unpingable or self.get_boot_id() returns a value different from
702        old_boot_id. If the boot_id value has changed then the function
703        returns True under the assumption that the machine has shut down
704        and has now already come back up.
705
706        If old_boot_id is None then until the machine becomes unreachable the
707        method assumes the machine has not yet shut down.
708
709        @param timeout Time limit in seconds before returning even if the host
710            is still up.
711        @param warning_timer Time limit in seconds that will generate a warning
712            if the host is not down yet. Can be None for no warning.
713        @param old_boot_id A string containing the result of self.get_boot_id()
714            prior to the host being told to shut down. Can be None if this is
715            not available.
716        @param max_ping_timeout Maximum timeout in seconds for each
717            self.get_boot_id() call. If this timeout is hit, it is assumed that
718            the host went down and became unreachable.
719
720        @returns True if the host was found to be down (max_ping_timeout timeout
721            expired or boot_id changed if provided) and False if timeout
722            expired.
723        """
724        #TODO: there is currently no way to distinguish between knowing
725        #TODO: boot_id was unsupported and not knowing the boot_id.
726        current_time = int(time.time())
727        end_time = current_time + timeout
728
729        if warning_timer:
730            warn_time = current_time + warning_timer
731
732        if old_boot_id is not None:
733            logging.debug('Host %s pre-shutdown boot_id is %s',
734                          self.host_port, old_boot_id)
735
736        # Impose semi real-time deadline constraints, since some clients
737        # (eg: watchdog timer tests) expect strict checking of time elapsed.
738        # Each iteration of this loop is treated as though it atomically
739        # completes within current_time, this is needed because if we used
740        # inline time.time() calls instead then the following could happen:
741        #
742        # while time.time() < end_time:                     [23 < 30]
743        #    some code.                                     [takes 10 secs]
744        #    try:
745        #        new_boot_id = self.get_boot_id(timeout=end_time - time.time())
746        #                                                   [30 - 33]
747        # The last step will lead to a return True, when in fact the machine
748        # went down at 32 seconds (>30). Hence we need to pass get_boot_id
749        # the same time that allowed us into that iteration of the loop.
750        while current_time < end_time:
751            ping_timeout = min(end_time - current_time, max_ping_timeout)
752            try:
753                new_boot_id = self.get_boot_id(timeout=ping_timeout)
754            except error.AutoservError:
755                logging.debug('Host %s is now unreachable over ssh, is down',
756                              self.host_port)
757                return True
758            else:
759                # if the machine is up but the boot_id value has changed from
760                # old boot id, then we can assume the machine has gone down
761                # and then already come back up
762                if old_boot_id is not None and old_boot_id != new_boot_id:
763                    logging.debug('Host %s now has boot_id %s and so must '
764                                  'have rebooted', self.host_port, new_boot_id)
765                    return True
766
767            if warning_timer and current_time > warn_time:
768                self.record("INFO", None, "shutdown",
769                            "Shutdown took longer than %ds" % warning_timer)
770                # Print the warning only once.
771                warning_timer = None
772                # If a machine is stuck switching runlevels
773                # This may cause the machine to reboot.
774                self.run('kill -HUP 1', ignore_status=True)
775
776            time.sleep(1)
777            current_time = int(time.time())
778
779        return False
780
781
782    # tunable constants for the verify & repair code
783    AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
784                                               "gb_diskspace_required",
785                                               type=float,
786                                               default=20.0)
787
788
789    def verify_connectivity(self):
790        super(AbstractSSHHost, self).verify_connectivity()
791
792        logging.info('Pinging host ' + self.host_port)
793        self.ssh_ping()
794        logging.info("Host (ssh) %s is alive", self.host_port)
795
796        if self.is_shutting_down():
797            raise error.AutoservHostIsShuttingDownError("Host is shutting down")
798
799
800    def verify_software(self):
801        super(AbstractSSHHost, self).verify_software()
802        try:
803            self.check_diskspace(autotest.Autotest.get_install_dir(self),
804                                 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
805        except error.AutoservDiskFullHostError:
806            # only want to raise if it's a space issue
807            raise
808        except (error.AutoservHostError, autotest.AutodirNotFoundError):
809            logging.exception('autodir space check exception, this is probably '
810                             'safe to ignore\n')
811
812
813    def close(self):
814        super(AbstractSSHHost, self).close()
815        self.rpc_server_tracker.disconnect_all()
816        if not self._connection_pool:
817            self._master_ssh.close()
818        if os.path.exists(self.known_hosts_file):
819            os.remove(self.known_hosts_file)
820
821
822    def restart_master_ssh(self):
823        """
824        Stop and restart the ssh master connection.  This is meant as a last
825        resort when ssh commands fail and we don't understand why.
826        """
827        logging.debug('Restarting master ssh connection')
828        self._master_ssh.close()
829        self._master_ssh.maybe_start(timeout=30)
830
831
832
833    def start_master_ssh(self, timeout=DEFAULT_START_MASTER_SSH_TIMEOUT_S):
834        """
835        Called whenever a slave SSH connection needs to be initiated (e.g., by
836        run, rsync, scp). If master SSH support is enabled and a master SSH
837        connection is not active already, start a new one in the background.
838        Also, cleanup any zombie master SSH connections (e.g., dead due to
839        reboot).
840
841        timeout: timeout in seconds (default 5) to wait for master ssh
842                 connection to be established. If timeout is reached, a
843                 warning message is logged, but no other action is taken.
844        """
845        if not enable_master_ssh:
846            return
847        self._master_ssh.maybe_start(timeout=timeout)
848
849
850    def clear_known_hosts(self):
851        """Clears out the temporary ssh known_hosts file.
852
853        This is useful if the test SSHes to the machine, then reinstalls it,
854        then SSHes to it again.  It can be called after the reinstall to
855        reduce the spam in the logs.
856        """
857        logging.info("Clearing known hosts for host '%s', file '%s'.",
858                     self.host_port, self.known_hosts_file)
859        # Clear out the file by opening it for writing and then closing.
860        fh = open(self.known_hosts_file, "w")
861        fh.close()
862
863
864    def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True):
865        """Copy log directories from a host to a local directory.
866
867        @param remote_src_dir: A destination directory on the host.
868        @param local_dest_dir: A path to a local destination directory.
869            If it doesn't exist it will be created.
870        @param ignore_errors: If True, ignore exceptions.
871
872        @raises OSError: If there were problems creating the local_dest_dir and
873            ignore_errors is False.
874        @raises AutoservRunError, AutotestRunError: If something goes wrong
875            while copying the directories and ignore_errors is False.
876        """
877        if not self.check_cached_up_status():
878            logging.warning('Host %s did not answer to ping, skip collecting '
879                            'logs.', self.host_port)
880            return
881
882        locally_created_dest = False
883        if (not os.path.exists(local_dest_dir)
884                or not os.path.isdir(local_dest_dir)):
885            try:
886                os.makedirs(local_dest_dir)
887                locally_created_dest = True
888            except OSError as e:
889                logging.warning('Unable to collect logs from host '
890                                '%s: %s', self.host_port, e)
891                if not ignore_errors:
892                    raise
893                return
894
895        # Build test result directory summary
896        try:
897            result_tools_runner.run_on_client(self, remote_src_dir)
898        except (error.AutotestRunError, error.AutoservRunError,
899                error.AutoservSSHTimeout) as e:
900            logging.exception(
901                    'Non-critical failure: Failed to collect and throttle '
902                    'results at %s from host %s', remote_src_dir,
903                    self.host_port)
904
905        try:
906            self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True)
907        except (error.AutotestRunError, error.AutoservRunError,
908                error.AutoservSSHTimeout) as e:
909            logging.warning('Collection of %s to local dir %s from host %s '
910                            'failed: %s', remote_src_dir, local_dest_dir,
911                            self.host_port, e)
912            if locally_created_dest:
913                shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors)
914            if not ignore_errors:
915                raise
916
917        # Clean up directory summary file on the client side.
918        try:
919            result_tools_runner.run_on_client(self, remote_src_dir,
920                                              cleanup_only=True)
921        except (error.AutotestRunError, error.AutoservRunError,
922                error.AutoservSSHTimeout) as e:
923            logging.exception(
924                    'Non-critical failure: Failed to cleanup result summary '
925                    'files at %s in host %s', remote_src_dir, self.hostname)
926
927
928    def create_ssh_tunnel(self, port, local_port):
929        """Create an ssh tunnel from local_port to port.
930
931        This is used to forward a port securely through a tunnel process from
932        the server to the DUT for RPC server connection.
933
934        @param port: remote port on the host.
935        @param local_port: local forwarding port.
936
937        @return: the tunnel process.
938        """
939        tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
940        ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port)
941        tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
942        logging.debug('Full tunnel command: %s', tunnel_cmd)
943        # Exec the ssh process directly here rather than using a shell.
944        # Using a shell leaves a dangling ssh process, because we deliver
945        # signals to the shell wrapping ssh, not the ssh process itself.
946        args = shlex.split(tunnel_cmd)
947        tunnel_proc = subprocess.Popen(args, close_fds=True)
948        logging.debug('Started ssh tunnel, local = %d'
949                      ' remote = %d, pid = %d',
950                      local_port, port, tunnel_proc.pid)
951        return tunnel_proc
952
953
954    def disconnect_ssh_tunnel(self, tunnel_proc, port):
955        """
956        Disconnects a previously forwarded port from the server to the DUT for
957        RPC server connection.
958
959        @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|.
960        @param port: remote port on the DUT, used in ADBHost.
961
962        """
963        if tunnel_proc.poll() is None:
964            tunnel_proc.terminate()
965            logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
966        else:
967            logging.debug('Tunnel pid %d terminated early, status %d',
968                          tunnel_proc.pid, tunnel_proc.returncode)
969
970
971    def get_os_type(self):
972        """Returns the host OS descriptor (to be implemented in subclasses).
973
974        @return A string describing the OS type.
975        """
976        raise NotImplementedError
977
978
979    def check_cached_up_status(
980            self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS):
981        """Check if the DUT responded to ping in the past `expiration_seconds`.
982
983        @param expiration_seconds: The number of seconds to keep the cached
984                status of whether the DUT responded to ping.
985        @return: True if the DUT has responded to ping during the past
986                 `expiration_seconds`.
987        """
988        # Refresh the up status if any of following conditions is true:
989        # * cached status is never set
990        # * cached status is False, so the method can check if the host is up
991        #   again.
992        # * If the cached status is older than `expiration_seconds`
993        expire_time = time.time() - expiration_seconds
994        if (self._cached_up_status_updated is None or
995                not self._cached_up_status or
996                self._cached_up_status_updated < expire_time):
997            self._cached_up_status = self.is_up_fast()
998            self._cached_up_status_updated = time.time()
999        return self._cached_up_status
1000