1#
2# Copyright 2007 Google Inc. Released under the GPL v2
3
4"""
5This module defines the SSHHost class.
6
7Implementation details:
8You should import the "hosts" package instead of importing each type of host.
9
10        SSHHost: a remote machine with a ssh access
11"""
12
13import inspect
14import logging
15import re
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import pxssh
18from autotest_lib.server import utils
19from autotest_lib.server.hosts import abstract_ssh
20
21# In case cros_host is being ran via SSP on an older Moblab version with an
22# older chromite version.
23try:
24    from chromite.lib import metrics
25except ImportError:
26    metrics = utils.metrics_mock
27
28
29class SSHHost(abstract_ssh.AbstractSSHHost):
30    """
31    This class represents a remote machine controlled through an ssh
32    session on which you can run programs.
33
34    It is not the machine autoserv is running on. The machine must be
35    configured for password-less login, for example through public key
36    authentication.
37
38    It includes support for controlling the machine through a serial
39    console on which you can run programs. If such a serial console is
40    set up on the machine then capabilities such as hard reset and
41    boot strap monitoring are available. If the machine does not have a
42    serial console available then ordinary SSH-based commands will
43    still be available, but attempts to use extensions such as
44    console logging or hard reset will fail silently.
45
46    Implementation details:
47    This is a leaf class in an abstract class hierarchy, it must
48    implement the unimplemented methods in parent classes.
49    """
50
51    def _initialize(self, hostname, *args, **dargs):
52        """
53        Construct a SSHHost object
54
55        Args:
56                hostname: network hostname or address of remote machine
57        """
58        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
59        self.setup_ssh()
60
61
62    def ssh_command(self, connect_timeout=30, options='', alive_interval=300):
63        """
64        Construct an ssh command with proper args for this host.
65
66        @param connect_timeout: connection timeout (in seconds)
67        @param options: SSH options
68        @param alive_interval: SSH Alive interval.
69        """
70        options = "%s %s" % (options, self.master_ssh_option)
71        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
72                                         opts=options,
73                                         hosts_file=self.known_hosts_file,
74                                         connect_timeout=connect_timeout,
75                                         alive_interval=alive_interval)
76        return "%s %s" % (base_cmd, self.hostname)
77
78
79    def _verbose_logger_command(self, command):
80        """
81        Prepend the command for the client with information about the ssh command
82        to be executed and the server stack state.
83
84        @param command: the ssh command to be executed.
85        """
86        stack_frames = inspect.stack()
87        stack = ''
88        # The last 2 frames on the stack are boring. Print 5-2=3 stack frames.
89        count = min(5, len(stack_frames))
90        if count >= 3:
91            stack = inspect.getframeinfo(stack_frames[2][0]).function
92            for frame in stack_frames[3:count]:
93                function_name = inspect.getframeinfo(frame[0]).function
94                stack = '%s|%s' % (function_name, stack)
95        del stack_frames
96        # If "logger" executable exists on the DUT use it to respew |command|.
97        # Then regardless of "logger" run |command| as usual.
98        command = ('if type "logger" > /dev/null 2>&1; then'
99                   ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";'
100                   'fi; '
101                   '%s' % (stack, utils.sh_escape(command), command))
102        return command
103
104
105    def _run(self, command, timeout, ignore_status,
106             stdout, stderr, connect_timeout, env, options, stdin, args,
107             ignore_timeout, ssh_failure_retry_ok):
108        """Helper function for run()."""
109        ssh_cmd = self.ssh_command(connect_timeout, options)
110        if not env.strip():
111            env = ""
112        else:
113            env = "export %s;" % env
114        for arg in args:
115            command += ' "%s"' % utils.sh_escape(arg)
116        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
117
118        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
119        # container, sometimes shortly after reboot we will see DNS
120        # resolution errors on ssh commands; the problem never
121        # occurs more than once in a row.  This especially affects
122        # the autoupdate_Rollback test, but other cases have been
123        # affected, too.
124        #
125        # We work around it by detecting the first DNS resolution error
126        # and retrying exactly one time.
127        dns_error_retry_count = 1
128
129        def counters_inc(counter_name, failure_name):
130            """Helper function to increment metrics counters.
131            @param counter_name: string indicating which counter to use
132            @param failure_name: string indentifying an error, or 'success'
133            """
134            if counter_name == 'call':
135                # ssh_counter records the outcome of each ssh invocation
136                # inside _run(), including exceptions.
137                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
138                fields = {'error' : failure_name or 'success',
139                          'attempt' : ssh_call_count}
140                ssh_counter.increment(fields=fields)
141
142            if counter_name == 'run':
143                # run_counter records each call to _run() with its result
144                # and how many tries were made.  Calls are recorded when
145                # _run() exits (including exiting with an exception)
146                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
147                fields = {'error' : failure_name or 'success',
148                          'attempt' : ssh_call_count}
149                run_counter.increment(fields=fields)
150
151        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
152        # error 255: if a simple retry doesn't work, kill the ssh master
153        # connection and try again.  (Note that either error could come from
154        # the command running in the DUT, in which case the retry may be
155        # useless but, in theory, also harmless.)
156        if ssh_failure_retry_ok:
157            # Ignore ssh command timeout, even though it could be a timeout due
158            # to the command executing in the remote host.  Note that passing
159            # ignore_timeout = True makes utils.run() return None on timeouts
160            # (and only on timeouts).
161            original_ignore_timeout = ignore_timeout
162            ignore_timeout = True
163            ssh_failure_retry_count = 2
164        else:
165            ssh_failure_retry_count = 0
166
167        ssh_call_count = 0
168
169        while True:
170            try:
171                # Increment call count first, in case utils.run() throws an
172                # exception.
173                ssh_call_count += 1
174                result = utils.run(full_cmd, timeout, True, stdout, stderr,
175                                   verbose=False, stdin=stdin,
176                                   stderr_is_expected=ignore_status,
177                                   ignore_timeout=ignore_timeout)
178            except Exception as e:
179                # No retries on exception.
180                counters_inc('call', 'exception')
181                counters_inc('run', 'exception')
182                raise e
183
184            failure_name = None
185
186            if result:
187                if result.exit_status == 255:
188                    if re.search(r'^ssh: .*: Name or service not known',
189                                 result.stderr):
190                        failure_name = 'dns_failure'
191                    else:
192                        failure_name = 'error_255'
193                elif result.exit_status > 0:
194                    failure_name = 'nonzero_status'
195            else:
196                # result == None
197                failure_name = 'timeout'
198
199            # Record the outcome of the ssh invocation.
200            counters_inc('call', failure_name)
201
202            if failure_name:
203                # There was a failure: decide whether to retry.
204                if failure_name == 'dns_failure':
205                    if dns_error_retry_count > 0:
206                        logging.debug('retrying ssh because of DNS failure')
207                        dns_error_retry_count -= 1
208                        continue
209                else:
210                    if ssh_failure_retry_count == 2:
211                        logging.debug('retrying ssh command after %s',
212                                       failure_name)
213                        ssh_failure_retry_count -= 1
214                        continue
215                    elif ssh_failure_retry_count == 1:
216                        # After two failures, restart the master connection
217                        # before the final try.
218                        logging.debug('retry 2: restarting master connection')
219                        self.restart_master_ssh()
220                        # Last retry: reinstate timeout behavior.
221                        ignore_timeout = original_ignore_timeout
222                        ssh_failure_retry_count -= 1
223                        continue
224
225            # No retry conditions occurred.  Exit the loop.
226            break
227
228        # The outcomes of ssh invocations have been recorded.  Now record
229        # the outcome of this function.
230
231        if ignore_timeout and not result:
232            counters_inc('run', 'ignored_timeout')
233            return None
234
235        # The error messages will show up in band (indistinguishable
236        # from stuff sent through the SSH connection), so we have the
237        # remote computer echo the message "Connected." before running
238        # any command.  Since the following 2 errors have to do with
239        # connecting, it's safe to do these checks.
240        if result.exit_status == 255:
241            if re.search(r'^ssh: connect to host .* port .*: '
242                         r'Connection timed out\r$', result.stderr):
243                counters_inc('run', 'final_timeout')
244                raise error.AutoservSSHTimeout("ssh timed out", result)
245            if "Permission denied." in result.stderr:
246                msg = "ssh permission denied"
247                counters_inc('run', 'final_eperm')
248                raise error.AutoservSshPermissionDeniedError(msg, result)
249
250        if not ignore_status and result.exit_status > 0:
251            counters_inc('run', 'final_run_error')
252            raise error.AutoservRunError("command execution error", result)
253
254        counters_inc('run', failure_name)
255        return result
256
257
258    def run(self, command, timeout=3600, ignore_status=False,
259            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
260            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
261            ignore_timeout=False, ssh_failure_retry_ok=False):
262        """
263        Run a command on the remote host.
264        @see common_lib.hosts.host.run()
265
266        @param timeout: command execution timeout
267        @param connect_timeout: ssh connection timeout (in seconds)
268        @param options: string with additional ssh command options
269        @param verbose: log the commands
270        @param ignore_timeout: bool True if SSH command timeouts should be
271                ignored.  Will return None on command timeout.
272        @param ssh_failure_retry_ok: True if the command may be retried on
273                probable ssh failure (error 255 or timeout).  When true,
274                the command may be executed up to three times, the second
275                time after restarting the ssh master connection.  Use only for
276                commands that are idempotent, because when a "probable
277                ssh failure" occurs, we cannot tell if the command executed
278                or not.
279
280        @raises AutoservRunError: if the command failed
281        @raises AutoservSSHTimeout: ssh connection has timed out
282        """
283        if verbose:
284            logging.debug("Running (ssh) '%s'", command)
285            command = self._verbose_logger_command(command)
286
287        # Start a master SSH connection if necessary.
288        self.start_master_ssh()
289
290        env = " ".join("=".join(pair) for pair in self.env.iteritems())
291        try:
292            return self._run(command, timeout, ignore_status,
293                             stdout_tee, stderr_tee, connect_timeout, env,
294                             options, stdin, args, ignore_timeout,
295                             ssh_failure_retry_ok)
296        except error.CmdError, cmderr:
297            # We get a CmdError here only if there is timeout of that command.
298            # Catch that and stuff it into AutoservRunError and raise it.
299            timeout_message = str('Timeout encountered: %s' % cmderr.args[0])
300            raise error.AutoservRunError(timeout_message, cmderr.args[1])
301
302
303    def run_background(self, command, verbose=True):
304        """Start a command on the host in the background.
305
306        The command is started on the host in the background, and
307        this method call returns immediately without waiting for the
308        command's completion.  The PID of the process on the host is
309        returned as a string.
310
311        The command may redirect its stdin, stdout, or stderr as
312        necessary.  Without redirection, all input and output will
313        use /dev/null.
314
315        @param command The command to run in the background
316        @param verbose As for `self.run()`
317
318        @return Returns the PID of the remote background process
319                as a string.
320        """
321        # Redirection here isn't merely hygienic; it's a functional
322        # requirement.  sshd won't terminate until stdin, stdout,
323        # and stderr are all closed.
324        #
325        # The subshell is needed to do the right thing in case the
326        # passed in command has its own I/O redirections.
327        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
328        return self.run(cmd_fmt % command, verbose=verbose).stdout
329
330
331    def run_short(self, command, **kwargs):
332        """
333        Calls the run() command with a short default timeout.
334
335        Takes the same arguments as does run(),
336        with the exception of the timeout argument which
337        here is fixed at 60 seconds.
338        It returns the result of run.
339
340        @param command: the command line string
341
342        """
343        return self.run(command, timeout=60, **kwargs)
344
345
346    def run_grep(self, command, timeout=30, ignore_status=False,
347                 stdout_ok_regexp=None, stdout_err_regexp=None,
348                 stderr_ok_regexp=None, stderr_err_regexp=None,
349                 connect_timeout=30):
350        """
351        Run a command on the remote host and look for regexp
352        in stdout or stderr to determine if the command was
353        successul or not.
354
355
356        @param command: the command line string
357        @param timeout: time limit in seconds before attempting to
358                        kill the running process. The run() function
359                        will take a few seconds longer than 'timeout'
360                        to complete if it has to kill the process.
361        @param ignore_status: do not raise an exception, no matter
362                              what the exit code of the command is.
363        @param stdout_ok_regexp: regexp that should be in stdout
364                                 if the command was successul.
365        @param stdout_err_regexp: regexp that should be in stdout
366                                  if the command failed.
367        @param stderr_ok_regexp: regexp that should be in stderr
368                                 if the command was successul.
369        @param stderr_err_regexp: regexp that should be in stderr
370                                 if the command failed.
371        @param connect_timeout: connection timeout (in seconds)
372
373        Returns:
374                if the command was successul, raises an exception
375                otherwise.
376
377        Raises:
378                AutoservRunError:
379                - the exit code of the command execution was not 0.
380                - If stderr_err_regexp is found in stderr,
381                - If stdout_err_regexp is found in stdout,
382                - If stderr_ok_regexp is not found in stderr.
383                - If stdout_ok_regexp is not found in stdout,
384        """
385
386        # We ignore the status, because we will handle it at the end.
387        result = self.run(command, timeout, ignore_status=True,
388                          connect_timeout=connect_timeout)
389
390        # Look for the patterns, in order
391        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
392                                 (stdout_err_regexp, result.stdout)):
393            if regexp and stream:
394                err_re = re.compile (regexp)
395                if err_re.search(stream):
396                    raise error.AutoservRunError(
397                        '%s failed, found error pattern: "%s"' % (command,
398                                                                regexp), result)
399
400        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
401                                 (stdout_ok_regexp, result.stdout)):
402            if regexp and stream:
403                ok_re = re.compile (regexp)
404                if ok_re.search(stream):
405                    if ok_re.search(stream):
406                        return
407
408        if not ignore_status and result.exit_status > 0:
409            raise error.AutoservRunError("command execution error", result)
410
411
412    def setup_ssh_key(self):
413        """Setup SSH Key"""
414        logging.debug('Performing SSH key setup on %s:%d as %s.',
415                      self.hostname, self.port, self.user)
416
417        try:
418            host = pxssh.pxssh()
419            host.login(self.hostname, self.user, self.password,
420                        port=self.port)
421            public_key = utils.get_public_key()
422
423            host.sendline('mkdir -p ~/.ssh')
424            host.prompt()
425            host.sendline('chmod 700 ~/.ssh')
426            host.prompt()
427            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
428                            public_key)
429            host.prompt()
430            host.sendline('chmod 600 ~/.ssh/authorized_keys')
431            host.prompt()
432            host.logout()
433
434            logging.debug('SSH key setup complete.')
435
436        except:
437            logging.debug('SSH key setup has failed.')
438            try:
439                host.logout()
440            except:
441                pass
442
443
444    def setup_ssh(self):
445        """Setup SSH"""
446        if self.password:
447            try:
448                self.ssh_ping()
449            except error.AutoservSshPingHostError:
450                self.setup_ssh_key()
451