1# Copyright 2007 Google Inc. Released under the GPL v2
2#pylint: disable-msg=C0111
3
4import re, os, sys, traceback, time, glob, tempfile
5import logging
6
7import common
8from autotest_lib.server import installable_object, prebuild, utils
9from autotest_lib.client.common_lib import base_job, error, autotemp
10from autotest_lib.client.common_lib import packages
11from autotest_lib.client.common_lib import global_config
12from autotest_lib.client.common_lib import utils as client_utils
13
14try:
15    from chromite.lib import metrics
16except ImportError:
17    metrics = client_utils.metrics_mock
18
19
20AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client'
21AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client'
22
23
24get_value = global_config.global_config.get_config_value
25autoserv_prebuild = get_value('AUTOSERV', 'enable_server_prebuild',
26                              type=bool, default=False)
27
28
29class AutodirNotFoundError(Exception):
30    """No Autotest installation could be found."""
31
32
33class AutotestFailure(Exception):
34    """Gereric exception class for failures during a test run."""
35
36
37class AutotestAbort(AutotestFailure):
38    """
39    AutotestAborts are thrown when the DUT seems fine,
40    and the test doesn't give us an explicit reason for
41    failure; In this case we have no choice but to abort.
42    """
43
44
45class AutotestDeviceError(AutotestFailure):
46    """
47    Exceptions that inherit from AutotestDeviceError
48    are thrown when we can determine the current
49    state of the DUT and conclude that it probably
50    lead to the test failing; these exceptions lead
51    to failures instead of aborts.
52    """
53
54
55class AutotestDeviceNotPingable(AutotestDeviceError):
56    """Error for when a DUT becomes unpingable."""
57
58
59class AutotestDeviceNotSSHable(AutotestDeviceError):
60    """Error for when a DUT is pingable but not SSHable."""
61
62
63class AutotestDeviceRebooted(AutotestDeviceError):
64    """Error for when a DUT rebooted unexpectedly."""
65
66
67class BaseAutotest(installable_object.InstallableObject):
68    """
69    This class represents the Autotest program.
70
71    Autotest is used to run tests automatically and collect the results.
72    It also supports profilers.
73
74    Implementation details:
75    This is a leaf class in an abstract class hierarchy, it must
76    implement the unimplemented methods in parent classes.
77    """
78
79    def __init__(self, host=None):
80        self.host = host
81        self.got = False
82        self.installed = False
83        self.serverdir = utils.get_server_dir()
84        super(BaseAutotest, self).__init__()
85
86
87    install_in_tmpdir = False
88    @classmethod
89    def set_install_in_tmpdir(cls, flag):
90        """ Sets a flag that controls whether or not Autotest should by
91        default be installed in a "standard" directory (e.g.
92        /home/autotest, /usr/local/autotest) or a temporary directory. """
93        cls.install_in_tmpdir = flag
94
95
96    @classmethod
97    def get_client_autodir_paths(cls, host):
98        return global_config.global_config.get_config_value(
99                'AUTOSERV', 'client_autodir_paths', type=list)
100
101
102    @classmethod
103    def get_installed_autodir(cls, host):
104        """
105        Find where the Autotest client is installed on the host.
106        @returns an absolute path to an installed Autotest client root.
107        @raises AutodirNotFoundError if no Autotest installation can be found.
108        """
109        autodir = host.get_autodir()
110        if autodir:
111            logging.debug('Using existing host autodir: %s', autodir)
112            return autodir
113
114        for path in Autotest.get_client_autodir_paths(host):
115            try:
116                autotest_binary = os.path.join(path, 'bin', 'autotest')
117                host.run('test -x %s' % utils.sh_escape(autotest_binary))
118                host.run('test -w %s' % utils.sh_escape(path))
119                logging.debug('Found existing autodir at %s', path)
120                return path
121            except error.GenericHostRunError:
122                logging.debug('%s does not exist on %s', autotest_binary,
123                              host.hostname)
124        raise AutodirNotFoundError
125
126
127    @classmethod
128    def get_install_dir(cls, host):
129        """
130        Determines the location where autotest should be installed on
131        host. If self.install_in_tmpdir is set, it will return a unique
132        temporary directory that autotest can be installed in. Otherwise, looks
133        for an existing installation to use; if none is found, looks for a
134        usable directory in the global config client_autodir_paths.
135        """
136        try:
137            install_dir = cls.get_installed_autodir(host)
138        except AutodirNotFoundError:
139            install_dir = cls._find_installable_dir(host)
140
141        if cls.install_in_tmpdir:
142            return host.get_tmp_dir(parent=install_dir)
143        return install_dir
144
145
146    @classmethod
147    def _find_installable_dir(cls, host):
148        client_autodir_paths = cls.get_client_autodir_paths(host)
149        for path in client_autodir_paths:
150            try:
151                host.run('mkdir -p %s' % utils.sh_escape(path))
152                host.run('test -w %s' % utils.sh_escape(path))
153                return path
154            except error.AutoservRunError:
155                logging.debug('Failed to create %s', path)
156        metrics.Counter(
157            'chromeos/autotest/errors/no_autotest_install_path').increment(
158                fields={'dut_host_name': host.hostname})
159        raise error.AutoservInstallError(
160                'Unable to find a place to install Autotest; tried %s' %
161                ', '.join(client_autodir_paths))
162
163
164    def get_fetch_location(self):
165        c = global_config.global_config
166        repos = c.get_config_value("PACKAGES", 'fetch_location', type=list,
167                                   default=[])
168        repos.reverse()
169        return repos
170
171
172    def install(self, host=None, autodir=None, use_packaging=True):
173        self._install(host=host, autodir=autodir, use_packaging=use_packaging)
174
175
176    def install_full_client(self, host=None, autodir=None):
177        self._install(host=host, autodir=autodir, use_autoserv=False,
178                      use_packaging=False)
179
180
181    def install_no_autoserv(self, host=None, autodir=None):
182        self._install(host=host, autodir=autodir, use_autoserv=False)
183
184
185    def _install_using_packaging(self, host, autodir):
186        repos = self.get_fetch_location()
187        if not repos:
188            raise error.PackageInstallError("No repos to install an "
189                                            "autotest client from")
190        pkgmgr = packages.PackageManager(autodir, hostname=host.hostname,
191                                         repo_urls=repos,
192                                         do_locking=False,
193                                         run_function=host.run,
194                                         run_function_dargs=dict(timeout=600))
195        # The packages dir is used to store all the packages that
196        # are fetched on that client. (for the tests,deps etc.
197        # too apart from the client)
198        pkg_dir = os.path.join(autodir, 'packages')
199        # clean up the autodir except for the packages directory
200        host.run('cd %s && ls | grep -v "^packages$"'
201                 ' | xargs rm -rf && rm -rf .[!.]*' % autodir)
202        pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir,
203                           preserve_install_dir=True)
204        self.installed = True
205
206
207    def _install_using_send_file(self, host, autodir):
208        dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers",
209                               "packages"])
210        light_files = [os.path.join(self.source_material, f)
211                       for f in os.listdir(self.source_material)
212                       if f not in dirs_to_exclude]
213        host.send_file(light_files, autodir, delete_dest=True)
214
215        # create empty dirs for all the stuff we excluded
216        commands = []
217        for path in dirs_to_exclude:
218            abs_path = os.path.join(autodir, path)
219            abs_path = utils.sh_escape(abs_path)
220            commands.append("mkdir -p '%s'" % abs_path)
221            commands.append("touch '%s'/__init__.py" % abs_path)
222        host.run(';'.join(commands))
223
224
225    def _install(self, host=None, autodir=None, use_autoserv=True,
226                 use_packaging=True):
227        """
228        Install autotest.  If get() was not called previously, an
229        attempt will be made to install from the autotest svn
230        repository.
231
232        @param host A Host instance on which autotest will be installed
233        @param autodir Location on the remote host to install to
234        @param use_autoserv Enable install modes that depend on the client
235            running with the autoserv harness
236        @param use_packaging Enable install modes that use the packaging system
237
238        @exception AutoservError if a tarball was not specified and
239            the target host does not have svn installed in its path
240        """
241        if not host:
242            host = self.host
243        if not self.got:
244            self.get()
245        host.wait_up(timeout=30)
246        host.setup()
247        logging.info("Installing autotest on %s", host.hostname)
248
249        # set up the autotest directory on the remote machine
250        if not autodir:
251            autodir = self.get_install_dir(host)
252        logging.info('Using installation dir %s', autodir)
253        host.set_autodir(autodir)
254        host.run('mkdir -p %s' % utils.sh_escape(autodir))
255
256        # make sure there are no files in $AUTODIR/results
257        results_path = os.path.join(autodir, 'results')
258        host.run('rm -rf %s/*' % utils.sh_escape(results_path),
259                 ignore_status=True)
260
261        # Fetch the autotest client from the nearest repository
262        if use_packaging:
263            try:
264                self._install_using_packaging(host, autodir)
265                logging.info("Installation of autotest completed using the "
266                             "packaging system.")
267                return
268            except (error.PackageInstallError, error.AutoservRunError,
269                    global_config.ConfigError), e:
270                logging.info("Could not install autotest using the packaging "
271                             "system: %s. Trying other methods", e)
272        else:
273            # Delete the package checksum file to force dut updating local
274            # packages.
275            command = ('rm -f "%s"' %
276                       (os.path.join(autodir, packages.CHECKSUM_FILE)))
277            host.run(command)
278
279        # try to install from file or directory
280        if self.source_material:
281            c = global_config.global_config
282            supports_autoserv_packaging = c.get_config_value(
283                "PACKAGES", "serve_packages_from_autoserv", type=bool)
284            # Copy autotest recursively
285            if supports_autoserv_packaging and use_autoserv:
286                self._install_using_send_file(host, autodir)
287            else:
288                host.send_file(self.source_material, autodir, delete_dest=True)
289            logging.info("Installation of autotest completed from %s",
290                         self.source_material)
291            self.installed = True
292            return
293
294        # if that fails try to install using svn
295        if utils.run('which svn').exit_status:
296            raise error.AutoservError('svn not found on target machine: %s' %
297                                      host.name)
298        try:
299            host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir))
300        except error.AutoservRunError, e:
301            host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir))
302        logging.info("Installation of autotest completed using SVN.")
303        self.installed = True
304
305
306    def uninstall(self, host=None):
307        """
308        Uninstall (i.e. delete) autotest. Removes the autotest client install
309        from the specified host.
310
311        @params host a Host instance from which the client will be removed
312        """
313        if not self.installed:
314            return
315        if not host:
316            host = self.host
317        autodir = host.get_autodir()
318        if not autodir:
319            return
320
321        # perform the actual uninstall
322        host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True)
323        host.set_autodir(None)
324        self.installed = False
325
326
327    def get(self, location=None):
328        if not location:
329            location = os.path.join(self.serverdir, '../client')
330            location = os.path.abspath(location)
331        # If there's stuff run on our client directory already, it
332        # can cause problems. Try giving it a quick clean first.
333        cwd = os.getcwd()
334        os.chdir(location)
335        try:
336            utils.system('tools/make_clean', ignore_status=True)
337        finally:
338            os.chdir(cwd)
339        super(BaseAutotest, self).get(location)
340        self.got = True
341
342
343    def run(self, control_file, results_dir='.', host=None, timeout=None,
344            tag=None, parallel_flag=False, background=False,
345            client_disconnect_timeout=None, use_packaging=True):
346        """
347        Run an autotest job on the remote machine.
348
349        @param control_file: An open file-like-obj of the control file.
350        @param results_dir: A str path where the results should be stored
351                on the local filesystem.
352        @param host: A Host instance on which the control file should
353                be run.
354        @param timeout: Maximum number of seconds to wait for the run or None.
355        @param tag: Tag name for the client side instance of autotest.
356        @param parallel_flag: Flag set when multiple jobs are run at the
357                same time.
358        @param background: Indicates that the client should be launched as
359                a background job; the code calling run will be responsible
360                for monitoring the client and collecting the results.
361        @param client_disconnect_timeout: Seconds to wait for the remote host
362                to come back after a reboot. Defaults to the host setting for
363                DEFAULT_REBOOT_TIMEOUT.
364
365        @raises AutotestRunError: If there is a problem executing
366                the control file.
367        """
368        host = self._get_host_and_setup(host, use_packaging=use_packaging)
369        logging.debug('Autotest job starts on remote host: %s',
370                      host.hostname)
371        results_dir = os.path.abspath(results_dir)
372
373        if client_disconnect_timeout is None:
374            client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT
375
376        if tag:
377            results_dir = os.path.join(results_dir, tag)
378
379        atrun = _Run(host, results_dir, tag, parallel_flag, background)
380        self._do_run(control_file, results_dir, host, atrun, timeout,
381                     client_disconnect_timeout, use_packaging=use_packaging)
382
383
384    def _get_host_and_setup(self, host, use_packaging=True):
385        if not host:
386            host = self.host
387        if not self.installed:
388            self.install(host, use_packaging=use_packaging)
389
390        host.wait_up(timeout=30)
391        return host
392
393
394    def _do_run(self, control_file, results_dir, host, atrun, timeout,
395                client_disconnect_timeout, use_packaging=True):
396        try:
397            atrun.verify_machine()
398        except:
399            logging.error("Verify failed on %s. Reinstalling autotest",
400                          host.hostname)
401            self.install(host)
402            atrun.verify_machine()
403        debug = os.path.join(results_dir, 'debug')
404        try:
405            os.makedirs(debug)
406        except Exception:
407            pass
408
409        delete_file_list = [atrun.remote_control_file,
410                            atrun.remote_control_file + '.state',
411                            atrun.manual_control_file,
412                            atrun.manual_control_file + '.state']
413        cmd = ';'.join('rm -f ' + control for control in delete_file_list)
414        host.run(cmd, ignore_status=True)
415
416        tmppath = utils.get(control_file, local_copy=True)
417
418        # build up the initialization prologue for the control file
419        prologue_lines = []
420
421        # Add the additional user arguments
422        prologue_lines.append("args = %r\n" % self.job.args)
423
424        # If the packaging system is being used, add the repository list.
425        repos = None
426        try:
427            if use_packaging:
428                repos = self.get_fetch_location()
429                prologue_lines.append('job.add_repository(%s)\n' % repos)
430            else:
431                logging.debug('use_packaging is set to False, do not add any '
432                              'repository.')
433        except global_config.ConfigError, e:
434            # If repos is defined packaging is enabled so log the error
435            if repos:
436                logging.error(e)
437
438        # on full-size installs, turn on any profilers the server is using
439        if not atrun.background:
440            running_profilers = host.job.profilers.add_log.iteritems()
441            for profiler, (args, dargs) in running_profilers:
442                call_args = [repr(profiler)]
443                call_args += [repr(arg) for arg in args]
444                call_args += ["%s=%r" % item for item in dargs.iteritems()]
445                prologue_lines.append("job.profilers.add(%s)\n"
446                                      % ", ".join(call_args))
447        cfile = "".join(prologue_lines)
448
449        cfile += open(tmppath).read()
450        open(tmppath, "w").write(cfile)
451
452        # Create and copy state file to remote_control_file + '.state'
453        state_file = host.job.preprocess_client_state()
454        host.send_file(state_file, atrun.remote_control_file + '.init.state')
455        os.remove(state_file)
456
457        # Copy control_file to remote_control_file on the host
458        host.send_file(tmppath, atrun.remote_control_file)
459        if os.path.abspath(tmppath) != os.path.abspath(control_file):
460            os.remove(tmppath)
461
462        atrun.execute_control(
463                timeout=timeout,
464                client_disconnect_timeout=client_disconnect_timeout)
465
466
467    @classmethod
468    def _check_client_test_result(cls, host, test_name):
469        """
470        Check result of client test.
471        Autotest will store results in the file name status.
472        We check that second to last line in that file begins with 'END GOOD'
473
474        @raises TestFail: If client test does not pass.
475        """
476        client_result_dir = '%s/results/default' % host.autodir
477        command = 'tail -2 %s/status | head -1' % client_result_dir
478        status = host.run(command).stdout.strip()
479        logging.info(status)
480        if status[:8] != 'END GOOD':
481            raise error.TestFail('%s client test did not pass.' % test_name)
482
483
484    def run_timed_test(self, test_name, results_dir='.', host=None,
485                       timeout=None, parallel_flag=False, background=False,
486                       client_disconnect_timeout=None, *args, **dargs):
487        """
488        Assemble a tiny little control file to just run one test,
489        and run it as an autotest client-side test
490        """
491        if not host:
492            host = self.host
493        if not self.installed:
494            self.install(host)
495        opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()]
496        cmd = ", ".join([repr(test_name)] + map(repr, args) + opts)
497        control = "job.run_test(%s)\n" % cmd
498        self.run(control, results_dir, host, timeout=timeout,
499                 parallel_flag=parallel_flag, background=background,
500                 client_disconnect_timeout=client_disconnect_timeout)
501
502        if dargs.get('check_client_result', False):
503            self._check_client_test_result(host, test_name)
504
505
506    def run_test(self, test_name, results_dir='.', host=None,
507                 parallel_flag=False, background=False,
508                 client_disconnect_timeout=None, *args, **dargs):
509        self.run_timed_test(test_name, results_dir, host, timeout=None,
510                            parallel_flag=parallel_flag, background=background,
511                            client_disconnect_timeout=client_disconnect_timeout,
512                            *args, **dargs)
513
514
515class _BaseRun(object):
516    """
517    Represents a run of autotest control file.  This class maintains
518    all the state necessary as an autotest control file is executed.
519
520    It is not intended to be used directly, rather control files
521    should be run using the run method in Autotest.
522    """
523    def __init__(self, host, results_dir, tag, parallel_flag, background):
524        self.host = host
525        self.results_dir = results_dir
526        self.env = host.env
527        self.tag = tag
528        self.parallel_flag = parallel_flag
529        self.background = background
530        self.autodir = Autotest.get_installed_autodir(self.host)
531        control = os.path.join(self.autodir, 'control')
532        if tag:
533            control += '.' + tag
534        self.manual_control_file = control
535        self.remote_control_file = control + '.autoserv'
536        self.config_file = os.path.join(self.autodir, 'global_config.ini')
537
538
539    def verify_machine(self):
540        binary = os.path.join(self.autodir, 'bin/autotest')
541        try:
542            self.host.run('ls %s > /dev/null 2>&1' % binary)
543        except:
544            raise error.AutoservInstallError(
545                "Autotest does not appear to be installed")
546
547        if not self.parallel_flag:
548            tmpdir = os.path.join(self.autodir, 'tmp')
549            download = os.path.join(self.autodir, 'tests/download')
550            self.host.run('umount %s' % tmpdir, ignore_status=True)
551            self.host.run('umount %s' % download, ignore_status=True)
552
553
554    def get_base_cmd_args(self, section):
555        args = ['--verbose']
556        if section > 0:
557            args.append('-c')
558        if self.tag:
559            args.append('-t %s' % self.tag)
560        if self.host.job.use_external_logging():
561            args.append('-l')
562        if self.host.hostname:
563            args.append('--hostname=%s' % self.host.hostname)
564        args.append('--user=%s' % self.host.job.user)
565
566        args.append(self.remote_control_file)
567        return args
568
569
570    def get_background_cmd(self, section):
571        cmd = ['nohup', os.path.join(self.autodir, 'bin/autotest_client')]
572        cmd += self.get_base_cmd_args(section)
573        cmd += ['>/dev/null', '2>/dev/null', '&']
574        return ' '.join(cmd)
575
576
577    def get_daemon_cmd(self, section, monitor_dir):
578        cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'),
579               monitor_dir, '-H autoserv']
580        cmd += self.get_base_cmd_args(section)
581        cmd += ['>/dev/null', '2>/dev/null', '&']
582        return ' '.join(cmd)
583
584
585    def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read):
586        cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'),
587               monitor_dir, str(stdout_read), str(stderr_read)]
588        return ' '.join(cmd)
589
590
591    def get_client_log(self):
592        """Find what the "next" client.* prefix should be
593
594        @returns A string of the form client.INTEGER that should be prefixed
595            to all client debug log files.
596        """
597        max_digit = -1
598        debug_dir = os.path.join(self.results_dir, 'debug')
599        client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*'))
600        for log in client_logs:
601            _, number, _ = log.split('.', 2)
602            if number.isdigit():
603                max_digit = max(max_digit, int(number))
604        return 'client.%d' % (max_digit + 1)
605
606
607    def copy_client_config_file(self, client_log_prefix=None):
608        """
609        Create and copy the client config file based on the server config.
610
611        @param client_log_prefix: Optional prefix to prepend to log files.
612        """
613        client_config_file = self._create_client_config_file(client_log_prefix)
614        self.host.send_file(client_config_file, self.config_file)
615        os.remove(client_config_file)
616
617
618    def _create_client_config_file(self, client_log_prefix=None):
619        """
620        Create a temporary file with the [CLIENT] section configuration values
621        taken from the server global_config.ini.
622
623        @param client_log_prefix: Optional prefix to prepend to log files.
624
625        @return: Path of the temporary file generated.
626        """
627        config = global_config.global_config.get_section_values('CLIENT')
628        if client_log_prefix:
629            config.set('CLIENT', 'default_logging_name', client_log_prefix)
630        return self._create_aux_file(config.write)
631
632
633    def _create_aux_file(self, func, *args):
634        """
635        Creates a temporary file and writes content to it according to a
636        content creation function. The file object is appended to *args, which
637        is then passed to the content creation function
638
639        @param func: Function that will be used to write content to the
640                temporary file.
641        @param *args: List of parameters that func takes.
642        @return: Path to the temporary file that was created.
643        """
644        fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir)
645        aux_file = os.fdopen(fd, "w")
646        try:
647            list_args = list(args)
648            list_args.append(aux_file)
649            func(*list_args)
650        finally:
651            aux_file.close()
652        return path
653
654
655    @staticmethod
656    def is_client_job_finished(last_line):
657        return bool(re.match(r'^END .*\t----\t----\t.*$', last_line))
658
659
660    @staticmethod
661    def is_client_job_rebooting(last_line):
662        return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line))
663
664
665    def _diagnose_dut(self, old_boot_id=None):
666        """
667        Run diagnostic checks on a DUT.
668
669        1. ping: A dead host will not respond to pings.
670        2. ssh (happens with 3.): DUT hangs usually fail in authentication
671            but respond to pings.
672        3. Check if a reboot occured: A healthy but unexpected reboot leaves the
673            host running with a new boot id.
674
675        This method will always raise an exception from the AutotestFailure
676        family and should only get called when the reason for a test failing
677        is ambiguous.
678
679        @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping.
680        @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT.
681        @raises AutotestDeviceRebooted: If the boot id changed.
682        @raises AutotestAbort: If none of the above exceptions were raised.
683            Since we have no recourse we must abort at this stage.
684        """
685        msg = 'Autotest client terminated unexpectedly: '
686        if utils.ping(self.host.hostname, tries=1, deadline=1) != 0:
687            msg += 'DUT is no longer pingable, it may have rebooted or hung.\n'
688            raise AutotestDeviceNotPingable(msg)
689
690        if old_boot_id:
691            try:
692                new_boot_id = self.host.get_boot_id(timeout=60)
693            except Exception as e:
694                msg += ('DUT is pingable but not SSHable, it most likely'
695                        ' sporadically rebooted during testing. %s\n' % str(e))
696                raise AutotestDeviceNotSSHable(msg)
697            else:
698                if new_boot_id != old_boot_id:
699                    msg += 'DUT rebooted during the test run.\n'
700                    raise AutotestDeviceRebooted(msg)
701
702            msg += ('DUT is pingable, SSHable and did NOT restart '
703                    'un-expectedly. We probably lost connectivity during the '
704                    'test.')
705        else:
706            msg += ('DUT is pingable, could not determine if an un-expected '
707                    'reboot occured during the test.')
708
709        raise AutotestAbort(msg)
710
711
712    def log_unexpected_abort(self, stderr_redirector, old_boot_id=None):
713        """
714        Logs that something unexpected happened, then tries to diagnose the
715        failure. The purpose of this function is only to close out the status
716        log with the appropriate error message, not to critically terminate
717        the program.
718
719        @param stderr_redirector: log stream.
720        @param old_boot_id: boot id used to infer if a reboot occured.
721        """
722        stderr_redirector.flush_all_buffers()
723        try:
724            self._diagnose_dut(old_boot_id)
725        except AutotestFailure as e:
726            self.host.job.record('END ABORT', None, None, str(e))
727
728
729    def _execute_in_background(self, section, timeout):
730        full_cmd = self.get_background_cmd(section)
731        devnull = open(os.devnull, "w")
732
733        self.copy_client_config_file(self.get_client_log())
734
735        self.host.job.push_execution_context(self.results_dir)
736        try:
737            result = self.host.run(full_cmd, ignore_status=True,
738                                   timeout=timeout,
739                                   stdout_tee=devnull,
740                                   stderr_tee=devnull)
741        finally:
742            self.host.job.pop_execution_context()
743
744        return result
745
746
747    @staticmethod
748    def _strip_stderr_prologue(stderr):
749        """Strips the 'standard' prologue that get pre-pended to every
750        remote command and returns the text that was actually written to
751        stderr by the remote command."""
752        stderr_lines = stderr.split("\n")[1:]
753        if not stderr_lines:
754            return ""
755        elif stderr_lines[0].startswith("NOTE: autotestd_monitor"):
756            del stderr_lines[0]
757        return "\n".join(stderr_lines)
758
759
760    def _execute_daemon(self, section, timeout, stderr_redirector,
761                        client_disconnect_timeout):
762        monitor_dir = self.host.get_tmp_dir()
763        daemon_cmd = self.get_daemon_cmd(section, monitor_dir)
764
765        # grab the location for the server-side client log file
766        client_log_prefix = self.get_client_log()
767        client_log_path = os.path.join(self.results_dir, 'debug',
768                                       client_log_prefix + '.log')
769        client_log = open(client_log_path, 'w', 0)
770        self.copy_client_config_file(client_log_prefix)
771
772        stdout_read = stderr_read = 0
773        self.host.job.push_execution_context(self.results_dir)
774        try:
775            self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
776            disconnect_warnings = []
777            while True:
778                monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
779                                                   stderr_read)
780                try:
781                    result = self.host.run(monitor_cmd, ignore_status=True,
782                                           timeout=timeout,
783                                           stdout_tee=client_log,
784                                           stderr_tee=stderr_redirector)
785                except error.AutoservRunError, e:
786                    result = e.result_obj
787                    result.exit_status = None
788                    disconnect_warnings.append(e.description)
789
790                    stderr_redirector.log_warning(
791                        "Autotest client was disconnected: %s" % e.description,
792                        "NETWORK")
793                except error.AutoservSSHTimeout:
794                    result = utils.CmdResult(monitor_cmd, "", "", None, 0)
795                    stderr_redirector.log_warning(
796                        "Attempt to connect to Autotest client timed out",
797                        "NETWORK")
798
799                stdout_read += len(result.stdout)
800                stderr_read += len(self._strip_stderr_prologue(result.stderr))
801
802                if result.exit_status is not None:
803                    # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
804                    logging.debug('Result exit status is %d.',
805                                  result.exit_status)
806                    return result
807                elif not self.host.wait_up(client_disconnect_timeout):
808                    raise error.AutoservSSHTimeout(
809                        "client was disconnected, reconnect timed out")
810        finally:
811            client_log.close()
812            self.host.job.pop_execution_context()
813
814
815    def execute_section(self, section, timeout, stderr_redirector,
816                        client_disconnect_timeout):
817        logging.info("Executing %s/bin/autotest %s/control phase %d",
818                     self.autodir, self.autodir, section)
819
820        if self.background:
821            result = self._execute_in_background(section, timeout)
822        else:
823            result = self._execute_daemon(section, timeout, stderr_redirector,
824                                          client_disconnect_timeout)
825
826        last_line = stderr_redirector.last_line
827
828        # check if we failed hard enough to warrant an exception
829        if result.exit_status == 1:
830            err = error.AutotestRunError("client job was aborted")
831        elif not self.background and not result.stderr:
832            err = error.AutotestRunError(
833                "execute_section %s failed to return anything\n"
834                "stdout:%s\n" % (section, result.stdout))
835        else:
836            err = None
837
838        # log something if the client failed AND never finished logging
839        if err and not self.is_client_job_finished(last_line):
840            self.log_unexpected_abort(stderr_redirector)
841
842        if err:
843            raise err
844        else:
845            return stderr_redirector.last_line
846
847
848    def _wait_for_reboot(self, old_boot_id):
849        logging.info("Client is rebooting")
850        logging.info("Waiting for client to halt")
851        if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
852                                   old_boot_id=old_boot_id):
853            err = "%s failed to shutdown after %d"
854            err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
855            raise error.AutotestRunError(err)
856        logging.info("Client down, waiting for restart")
857        if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
858            # since reboot failed
859            # hardreset the machine once if possible
860            # before failing this control file
861            warning = "%s did not come back up, hard resetting"
862            warning %= self.host.hostname
863            logging.warning(warning)
864            try:
865                self.host.hardreset(wait=False)
866            except (AttributeError, error.AutoservUnsupportedError):
867                warning = "Hard reset unsupported on %s"
868                warning %= self.host.hostname
869                logging.warning(warning)
870            raise error.AutotestRunError("%s failed to boot after %ds" %
871                                         (self.host.hostname,
872                                          self.host.DEFAULT_REBOOT_TIMEOUT))
873        self.host.reboot_followup()
874
875
876    def execute_control(self, timeout=None, client_disconnect_timeout=None):
877        if not self.background:
878            collector = log_collector(self.host, self.tag, self.results_dir)
879            hostname = self.host.hostname
880            remote_results = collector.client_results_dir
881            local_results = collector.server_results_dir
882            self.host.job.add_client_log(hostname, remote_results,
883                                         local_results)
884            job_record_context = self.host.job.get_record_context()
885
886        section = 0
887        start_time = time.time()
888
889        logger = client_logger(self.host, self.tag, self.results_dir)
890        try:
891            while not timeout or time.time() < start_time + timeout:
892                if timeout:
893                    section_timeout = start_time + timeout - time.time()
894                else:
895                    section_timeout = None
896                boot_id = self.host.get_boot_id()
897                last = self.execute_section(section, section_timeout,
898                                            logger, client_disconnect_timeout)
899                if self.background:
900                    return
901                section += 1
902                if self.is_client_job_finished(last):
903                    logging.info("Client complete")
904                    return
905                elif self.is_client_job_rebooting(last):
906                    try:
907                        self._wait_for_reboot(boot_id)
908                    except error.AutotestRunError, e:
909                        self.host.job.record("ABORT", None, "reboot", str(e))
910                        self.host.job.record("END ABORT", None, None, str(e))
911                        raise
912                    continue
913
914                # If a test fails without probable cause we try to bucket it's
915                # failure into one of 2 categories. If we can determine the
916                # current state of the device and it is suspicious, we close the
917                # status lines indicating a failure. If we either cannot
918                # determine the state of the device, or it appears totally
919                # healthy, we give up and abort.
920                try:
921                    self._diagnose_dut(boot_id)
922                except AutotestDeviceError as e:
923                    # The status lines of the test are pretty much tailed to
924                    # our log, with indentation, from the client job on the DUT.
925                    # So if the DUT goes down unexpectedly we'll end up with a
926                    # malformed status log unless we manually unwind the status
927                    # stack. Ideally we would want to write a nice wrapper like
928                    # server_job methods run_reboot, run_group but they expect
929                    # reboots and we don't.
930                    self.host.job.record('FAIL', None, None, str(e))
931                    self.host.job.record('END FAIL', None, None)
932                    self.host.job.record('END GOOD', None, None)
933                    self.host.job.failed_with_device_error = True
934                    return
935                except AutotestAbort as e:
936                    self.host.job.record('ABORT', None, None, str(e))
937                    self.host.job.record('END ABORT', None, None)
938
939                    # give the client machine a chance to recover from a crash
940                    self.host.wait_up(
941                        self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
942                    msg = ("Aborting - unexpected final status message from "
943                           "client on %s: %s\n") % (self.host.hostname, last)
944                    raise error.AutotestRunError(msg)
945        finally:
946            logging.debug('Autotest job finishes running. Below is the '
947                          'post-processing operations.')
948            logger.close()
949            if not self.background:
950                collector.collect_client_job_results()
951                collector.remove_redundant_client_logs()
952                state_file = os.path.basename(self.remote_control_file
953                                              + '.state')
954                state_path = os.path.join(self.results_dir, state_file)
955                self.host.job.postprocess_client_state(state_path)
956                self.host.job.remove_client_log(hostname, remote_results,
957                                                local_results)
958                job_record_context.restore()
959
960            logging.debug('Autotest job finishes.')
961
962        # should only get here if we timed out
963        assert timeout
964        raise error.AutotestTimeoutError()
965
966
967class log_collector(object):
968    def __init__(self, host, client_tag, results_dir):
969        self.host = host
970        if not client_tag:
971            client_tag = "default"
972        self.client_results_dir = os.path.join(host.get_autodir(), "results",
973                                               client_tag)
974        self.server_results_dir = results_dir
975
976
977    def collect_client_job_results(self):
978        """ A method that collects all the current results of a running
979        client job into the results dir. By default does nothing as no
980        client job is running, but when running a client job you can override
981        this with something that will actually do something. """
982
983        # make an effort to wait for the machine to come up
984        try:
985            self.host.wait_up(timeout=30)
986        except error.AutoservError:
987            # don't worry about any errors, we'll try and
988            # get the results anyway
989            pass
990
991        # Copy all dirs in default to results_dir
992        try:
993            with metrics.SecondsTimer(
994                    'chromeos/autotest/job/log_collection_duration',
995                    fields={'dut_host_name': self.host.hostname}):
996                self.host.get_file(
997                        self.client_results_dir + '/',
998                        self.server_results_dir,
999                        preserve_symlinks=True)
1000        except Exception:
1001            # well, don't stop running just because we couldn't get logs
1002            e_msg = "Unexpected error copying test result logs, continuing ..."
1003            logging.error(e_msg)
1004            traceback.print_exc(file=sys.stdout)
1005
1006
1007    def remove_redundant_client_logs(self):
1008        """Remove client.*.log files in favour of client.*.DEBUG files."""
1009        debug_dir = os.path.join(self.server_results_dir, 'debug')
1010        debug_files = [f for f in os.listdir(debug_dir)
1011                       if re.search(r'^client\.\d+\.DEBUG$', f)]
1012        for debug_file in debug_files:
1013            log_file = debug_file.replace('DEBUG', 'log')
1014            log_file = os.path.join(debug_dir, log_file)
1015            if os.path.exists(log_file):
1016                os.remove(log_file)
1017
1018
1019# a file-like object for catching stderr from an autotest client and
1020# extracting status logs from it
1021class BaseClientLogger(object):
1022    """Partial file object to write to both stdout and
1023    the status log file.  We only implement those methods
1024    utils.run() actually calls.
1025    """
1026    status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$")
1027    test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$")
1028    fetch_package_parser = re.compile(
1029        r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$")
1030    extract_indent = re.compile(r"^(\t*).*$")
1031    extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$")
1032
1033    def __init__(self, host, tag, server_results_dir):
1034        self.host = host
1035        self.job = host.job
1036        self.log_collector = log_collector(host, tag, server_results_dir)
1037        self.leftover = ""
1038        self.last_line = ""
1039        self.logs = {}
1040
1041
1042    def _process_log_dict(self, log_dict):
1043        log_list = log_dict.pop("logs", [])
1044        for key in sorted(log_dict.iterkeys()):
1045            log_list += self._process_log_dict(log_dict.pop(key))
1046        return log_list
1047
1048
1049    def _process_logs(self):
1050        """Go through the accumulated logs in self.log and print them
1051        out to stdout and the status log. Note that this processes
1052        logs in an ordering where:
1053
1054        1) logs to different tags are never interleaved
1055        2) logs to x.y come before logs to x.y.z for all z
1056        3) logs to x.y come before x.z whenever y < z
1057
1058        Note that this will in general not be the same as the
1059        chronological ordering of the logs. However, if a chronological
1060        ordering is desired that one can be reconstructed from the
1061        status log by looking at timestamp lines."""
1062        log_list = self._process_log_dict(self.logs)
1063        for entry in log_list:
1064            self.job.record_entry(entry, log_in_subdir=False)
1065        if log_list:
1066            self.last_line = log_list[-1].render()
1067
1068
1069    def _process_quoted_line(self, tag, line):
1070        """Process a line quoted with an AUTOTEST_STATUS flag. If the
1071        tag is blank then we want to push out all the data we've been
1072        building up in self.logs, and then the newest line. If the
1073        tag is not blank, then push the line into the logs for handling
1074        later."""
1075        entry = base_job.status_log_entry.parse(line)
1076        if entry is None:
1077            return  # the line contains no status lines
1078        if tag == "":
1079            self._process_logs()
1080            self.job.record_entry(entry, log_in_subdir=False)
1081            self.last_line = line
1082        else:
1083            tag_parts = [int(x) for x in tag.split(".")]
1084            log_dict = self.logs
1085            for part in tag_parts:
1086                log_dict = log_dict.setdefault(part, {})
1087            log_list = log_dict.setdefault("logs", [])
1088            log_list.append(entry)
1089
1090
1091    def _process_info_line(self, line):
1092        """Check if line is an INFO line, and if it is, interpret any control
1093        messages (e.g. enabling/disabling warnings) that it may contain."""
1094        match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line)
1095        if not match:
1096            return   # not an INFO line
1097        for field in match.group(1).split('\t'):
1098            if field.startswith("warnings.enable="):
1099                func = self.job.warning_manager.enable_warnings
1100            elif field.startswith("warnings.disable="):
1101                func = self.job.warning_manager.disable_warnings
1102            else:
1103                continue
1104            warning_type = field.split("=", 1)[1]
1105            func(warning_type)
1106
1107
1108    def _process_line(self, line):
1109        """Write out a line of data to the appropriate stream. Status
1110        lines sent by autotest will be prepended with
1111        "AUTOTEST_STATUS", and all other lines are ssh error
1112        messages."""
1113        status_match = self.status_parser.search(line)
1114        test_complete_match = self.test_complete_parser.search(line)
1115        fetch_package_match = self.fetch_package_parser.search(line)
1116        if status_match:
1117            tag, line = status_match.groups()
1118            self._process_info_line(line)
1119            self._process_quoted_line(tag, line)
1120        elif test_complete_match:
1121            self._process_logs()
1122            fifo_path, = test_complete_match.groups()
1123            try:
1124                self.log_collector.collect_client_job_results()
1125                self.host.run("echo A > %s" % fifo_path)
1126            except Exception:
1127                msg = "Post-test log collection failed, continuing anyway"
1128                logging.exception(msg)
1129        elif fetch_package_match:
1130            pkg_name, dest_path, fifo_path = fetch_package_match.groups()
1131            serve_packages = global_config.global_config.get_config_value(
1132                "PACKAGES", "serve_packages_from_autoserv", type=bool)
1133            if serve_packages and pkg_name.endswith(".tar.bz2"):
1134                try:
1135                    self._send_tarball(pkg_name, dest_path)
1136                except Exception:
1137                    msg = "Package tarball creation failed, continuing anyway"
1138                    logging.exception(msg)
1139            try:
1140                self.host.run("echo B > %s" % fifo_path)
1141            except Exception:
1142                msg = "Package tarball installation failed, continuing anyway"
1143                logging.exception(msg)
1144        else:
1145            logging.info(line)
1146
1147
1148    def _send_tarball(self, pkg_name, remote_dest):
1149        name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name)
1150        src_dirs = []
1151        if pkg_type == 'test':
1152            for test_dir in ['site_tests', 'tests']:
1153                src_dir = os.path.join(self.job.clientdir, test_dir, name)
1154                if os.path.exists(src_dir):
1155                    src_dirs += [src_dir]
1156                    if autoserv_prebuild:
1157                        prebuild.setup(self.job.clientdir, src_dir)
1158                    break
1159        elif pkg_type == 'profiler':
1160            src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)]
1161            if autoserv_prebuild:
1162                prebuild.setup(self.job.clientdir, src_dir)
1163        elif pkg_type == 'dep':
1164            src_dirs += [os.path.join(self.job.clientdir, 'deps', name)]
1165        elif pkg_type == 'client':
1166            return  # you must already have a client to hit this anyway
1167        else:
1168            return  # no other types are supported
1169
1170        # iterate over src_dirs until we find one that exists, then tar it
1171        for src_dir in src_dirs:
1172            if os.path.exists(src_dir):
1173                try:
1174                    logging.info('Bundling %s into %s', src_dir, pkg_name)
1175                    temp_dir = autotemp.tempdir(unique_id='autoserv-packager',
1176                                                dir=self.job.tmpdir)
1177                    tarball_path = self.job.pkgmgr.tar_package(
1178                        pkg_name, src_dir, temp_dir.name, " .")
1179                    self.host.send_file(tarball_path, remote_dest)
1180                finally:
1181                    temp_dir.clean()
1182                return
1183
1184
1185    def log_warning(self, msg, warning_type):
1186        """Injects a WARN message into the current status logging stream."""
1187        timestamp = int(time.time())
1188        if self.job.warning_manager.is_valid(timestamp, warning_type):
1189            self.job.record('WARN', None, None, msg)
1190
1191
1192    def write(self, data):
1193        # now start processing the existing buffer and the new data
1194        data = self.leftover + data
1195        lines = data.split('\n')
1196        processed_lines = 0
1197        try:
1198            # process all the buffered data except the last line
1199            # ignore the last line since we may not have all of it yet
1200            for line in lines[:-1]:
1201                self._process_line(line)
1202                processed_lines += 1
1203        finally:
1204            # save any unprocessed lines for future processing
1205            self.leftover = '\n'.join(lines[processed_lines:])
1206
1207
1208    def flush(self):
1209        sys.stdout.flush()
1210
1211
1212    def flush_all_buffers(self):
1213        if self.leftover:
1214            self._process_line(self.leftover)
1215            self.leftover = ""
1216        self._process_logs()
1217        self.flush()
1218
1219
1220    def close(self):
1221        self.flush_all_buffers()
1222
1223
1224SiteAutotest = client_utils.import_site_class(
1225    __file__, "autotest_lib.server.site_autotest", "SiteAutotest",
1226    BaseAutotest)
1227
1228
1229_SiteRun = client_utils.import_site_class(
1230    __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun)
1231
1232
1233SiteClientLogger = client_utils.import_site_class(
1234    __file__, "autotest_lib.server.site_autotest", "SiteClientLogger",
1235    BaseClientLogger)
1236
1237
1238class Autotest(SiteAutotest):
1239    pass
1240
1241
1242class client_logger(SiteClientLogger):
1243    pass
1244
1245
1246class _Run(_SiteRun):
1247    pass
1248