1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import bz2
6import glob
7import json
8import logging
9import os
10import re
11import shutil
12import tempfile
13import time
14import xml.etree.ElementTree as et
15from autotest_lib.client.bin import test, utils
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.cros import cros_logging, service_stopper
18from autotest_lib.client.cros.graphics import graphics_utils
19
20RERUN_RATIO = 0.02  # Ratio to rerun failing test for hasty mode
21
22
23class graphics_dEQP(graphics_utils.GraphicsTest):
24    """Run the drawElements Quality Program test suite."""
25    version = 1
26    _services = None
27    _hasty = False
28    _hasty_batch_size = 100  # Batch size in hasty mode.
29    _shard_number = 0
30    _shard_count = 1
31    _board = None
32    _cpu_type = None
33    _gpu_type = None
34    _surface = None
35    _filter = None
36    _width = 256  # Use smallest width for which all tests run/pass.
37    _height = 256  # Use smallest height for which all tests run/pass.
38    _timeout = 70  # Larger than twice the dEQP watchdog timeout at 30s.
39    _test_names = None
40    _test_names_file = None
41    _log_path = None  # Location for detailed test output logs (in /tmp/).
42    _debug = False  # Analyze kernel messages.
43    _log_reader = None  # Reader to analyze (kernel) messages log.
44    _log_filter = re.compile('.* .* kernel:')  # kernel messages filter.
45    _env = None  # environment for test processes
46    DEQP_MODULES = {
47        'dEQP-EGL': 'egl',
48        'dEQP-GLES2': 'gles2',
49        'dEQP-GLES3': 'gles3',
50        'dEQP-GLES31': 'gles31',
51        'dEQP-VK': 'vk',
52    }
53    # We do not consider these results as failures.
54    TEST_RESULT_FILTER = [
55        'pass', 'notsupported', 'internalerror', 'qualitywarning',
56        'compatibilitywarning', 'skipped'
57    ]
58
59    def initialize(self):
60        super(graphics_dEQP, self).initialize()
61        self._api_helper = graphics_utils.GraphicsApiHelper()
62        self._board = utils.get_board()
63        self._cpu_type = utils.get_cpu_soc_family()
64        self._gpu_type = utils.get_gpu_family()
65
66        # deqp may depend on libraries that are present only on test images.
67        # Those libraries are installed in /usr/local.
68        self._env = os.environ.copy()
69        old_ld_path = self._env.get('LD_LIBRARY_PATH', '')
70        if old_ld_path:
71            self._env[
72                'LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64:' + old_ld_path
73        else:
74            self._env['LD_LIBRARY_PATH'] = '/usr/local/lib:/usr/local/lib64'
75
76        self._services = service_stopper.ServiceStopper(['ui', 'powerd'])
77        # Valid choices are fbo and pbuffer. The latter avoids dEQP assumptions.
78        self._surface = 'pbuffer'
79        self._services.stop_services()
80
81    def cleanup(self):
82        if self._services:
83            self._services.restore_services()
84        super(graphics_dEQP, self).cleanup()
85
86    def _archive_test_results(self, result_filename):
87        """Reduce space usage.
88
89        The default /tmp result file location is memory backed and capped at 1/2
90        of main memory. We have experienced out of storage situations. Avoid
91        this for instance by using compression.
92        """
93        os.system('gzip %s' % result_filename)
94
95    def _parse_test_results(self,
96                            result_filename,
97                            test_results=None,
98                            failing_test=None):
99        """Handles result files with one or more test results.
100
101        @param result_filename: log file to parse.
102        @param test_results: Result parsed will be appended to it.
103        @param failing_test: Tests considered failed will append to it.
104
105        @return: dictionary of parsed test results.
106        """
107        xml = ''
108        xml_start = False
109        xml_complete = False
110        xml_bad = False
111        result = 'ParseTestResultFail'
112
113        if test_results is None:
114            test_results = {}
115
116        if not os.path.isfile(result_filename):
117            logging.error('Did not find file %s', result_filename)
118            return test_results
119
120        with open(result_filename) as result_file:
121            for line in result_file.readlines():
122                # If the test terminates early, the XML will be incomplete
123                # and should not be parsed.
124                if line.startswith('#terminateTestCaseResult'):
125                    result = line.strip().split()[1]
126                    xml_bad = True
127                # Will only see #endTestCaseResult if the test does not
128                # terminate early.
129                elif line.startswith('#endTestCaseResult'):
130                    xml_complete = True
131                elif xml_start:
132                    xml += line
133                elif line.startswith('#beginTestCaseResult'):
134                    # If we see another begin before an end then something is
135                    # wrong.
136                    if xml_start:
137                        xml_bad = True
138                    else:
139                        xml_start = True
140                        test_case = line.split(' ')[1]
141
142                if xml_complete or xml_bad:
143                    if xml_complete:
144                        myparser = et.XMLParser(encoding='ISO-8859-1')
145                        root = et.fromstring(xml, parser=myparser)
146                        test_case = root.attrib['CasePath']
147                        result = root.find('Result').get('StatusCode').strip()
148                        xml_complete = False
149                    test_results[result] = test_results.get(result, 0) + 1
150                    if (result.lower() not in self.TEST_RESULT_FILTER and
151                            failing_test != None):
152                        failing_test.append(test_case)
153                    xml_bad = False
154                    xml_start = False
155                    result = 'ParseTestResultFail'
156                    xml = ''
157
158        return test_results
159
160    def _load_not_passing_cases(self, test_filter):
161        """Load all test cases that are in non-'Pass' expectations."""
162        not_passing_cases = []
163        expectations_dir = os.path.join(self.bindir, 'expectations',
164                                        self._gpu_type)
165        subset_spec = '%s.*' % test_filter
166        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
167        for subset_file in subset_paths:
168            # Filter against extra hasty failures only in hasty mode.
169            if (not '.Pass.bz2' in subset_file and
170                (self._hasty or '.hasty.' not in subset_file)):
171                not_passing_cases.extend(
172                    bz2.BZ2File(subset_file).read().splitlines())
173        not_passing_cases.sort()
174        return not_passing_cases
175
176    def _translate_name_to_api(self, name):
177        """Translate test_names or test_filter to api."""
178        test_prefix = name.split('.')[0]
179        if test_prefix in self.DEQP_MODULES:
180            api = self.DEQP_MODULES[test_prefix]
181        else:
182            raise error.TestFail('Failed: Invalid test name: %s' % name)
183        return api
184
185    def _get_executable(self, api):
186        """Return the executable path of the api."""
187        return self._api_helper.get_deqp_executable(api)
188
189    def _can_run(self, api):
190        """Check if specific api is supported in this board."""
191        return api in self._api_helper.get_supported_apis()
192
193    def _bootstrap_new_test_cases(self, test_filter):
194        """Ask dEQP for all test cases and removes non-Pass'ing ones.
195
196        This function will query dEQP for test cases and remove all cases that
197        are not in 'Pass'ing expectations from the list. This can be used
198        incrementally updating failing/hangin tests over several runs.
199
200        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
201
202        @return: List of dEQP tests to run.
203        """
204        test_cases = []
205        api = self._translate_name_to_api(test_filter)
206        if self._can_run(api):
207            executable = self._get_executable(api)
208        else:
209            return test_cases
210
211        # Must be in the executable directory when running for it to find it's
212        # test data files!
213        os.chdir(os.path.dirname(executable))
214
215        not_passing_cases = self._load_not_passing_cases(test_filter)
216        # We did not find passing cases in expectations. Assume everything else
217        # that is there should not be run this time.
218        expectations_dir = os.path.join(self.bindir, 'expectations',
219                                        self._gpu_type)
220        subset_spec = '%s.*' % test_filter
221        subset_paths = glob.glob(os.path.join(expectations_dir, subset_spec))
222        for subset_file in subset_paths:
223            # Filter against hasty failures only in hasty mode.
224            if self._hasty or '.hasty.' not in subset_file:
225                not_passing_cases.extend(
226                    bz2.BZ2File(subset_file).read().splitlines())
227
228        # Now ask dEQP executable nicely for whole list of tests. Needs to be
229        # run in executable directory. Output file is plain text file named
230        # e.g. 'dEQP-GLES2-cases.txt'.
231        command = ('%s '
232                   '--deqp-runmode=txt-caselist '
233                   '--deqp-surface-type=%s '
234                   '--deqp-gl-config-name=rgba8888d24s8ms0 ' %
235                   (executable, self._surface))
236        logging.info('Running command %s', command)
237        utils.run(
238            command,
239            env=self._env,
240            timeout=60,
241            stderr_is_expected=False,
242            ignore_status=False,
243            stdin=None)
244
245        # Now read this caselist file.
246        caselist_name = '%s-cases.txt' % test_filter.split('.')[0]
247        caselist_file = os.path.join(os.path.dirname(executable), caselist_name)
248        if not os.path.isfile(caselist_file):
249            raise error.TestFail(
250                'Failed: No caselist file at %s!' % caselist_file)
251
252        # And remove non-Pass'ing expectations from caselist.
253        caselist = open(caselist_file).read().splitlines()
254        # Contains lines like "TEST: dEQP-GLES2.capability"
255        test_cases = []
256        match = 'TEST: %s' % test_filter
257        logging.info('Bootstrapping test cases matching "%s".', match)
258        for case in caselist:
259            if case.startswith(match):
260                case = case.split('TEST: ')[1]
261                test_cases.append(case)
262
263        test_cases = list(set(test_cases) - set(not_passing_cases))
264        if not test_cases:
265            raise error.TestFail(
266                'Failed: Unable to bootstrap %s!' % test_filter)
267
268        test_cases.sort()
269        return test_cases
270
271    def _get_test_cases_from_names_file(self):
272        if os.path.exists(self._test_names_file):
273            file_path = self._test_names_file
274            test_cases = [line.rstrip('\n') for line in open(file_path)]
275            return [test for test in test_cases if test and not test.isspace()]
276        return []
277
278    def _get_test_cases(self, test_filter, subset):
279        """Gets the test cases for 'Pass', 'Fail' etc.
280
281        expectations.
282
283        This function supports bootstrapping of new GPU families and dEQP
284        binaries. In particular if there are not 'Pass' expectations found for
285        this GPU family it will query the dEQP executable for a list of all
286        available tests. It will then remove known non-'Pass'ing tests from
287        this list to avoid getting into hangs/crashes etc.
288
289        @param test_filter: string like 'dEQP-GLES2.info', 'dEQP-GLES3.stress'.
290        @param subset: string from 'Pass', 'Fail', 'Timeout' etc.
291
292        @return: List of dEQP tests to run.
293        """
294        expectations_dir = os.path.join(self.bindir, 'expectations',
295                                        self._gpu_type)
296        subset_name = '%s.%s.bz2' % (test_filter, subset)
297        subset_path = os.path.join(expectations_dir, subset_name)
298        if not os.path.isfile(subset_path):
299            if subset == 'NotPass':
300                # TODO(ihf): Running hasty and NotPass together is an invitation
301                # for trouble (stability). Decide if it should be disallowed.
302                return self._load_not_passing_cases(test_filter)
303            if subset != 'Pass':
304                raise error.TestFail(
305                    'Failed: No subset file found for %s!' % subset_path)
306            # Ask dEQP for all cases and remove the failing ones.
307            return self._bootstrap_new_test_cases(test_filter)
308
309        test_cases = bz2.BZ2File(subset_path).read().splitlines()
310        if not test_cases:
311            raise error.TestFail(
312                'Failed: No test cases found in subset file %s!' % subset_path)
313        return test_cases
314
315    def _run_tests_individually(self, test_cases, failing_test=None):
316        """Runs tests as isolated from each other, but slowly.
317
318        This function runs each test case separately as a command.
319        This means a new context for each test etc. Failures will be more
320        isolated, but runtime quite high due to overhead.
321
322        @param test_cases: List of dEQP test case strings.
323        @param failing_test: Tests considered failed will be appended to it.
324
325        @return: dictionary of test results.
326        """
327        test_results = {}
328        width = self._width
329        height = self._height
330
331        i = 0
332        for test_case in test_cases:
333            i += 1
334            logging.info('[%d/%d] TestCase: %s', i, len(test_cases), test_case)
335            result_prefix = os.path.join(self._log_path, test_case)
336            log_file = '%s.log' % result_prefix
337            debug_file = '%s.debug' % result_prefix
338            api = self._translate_name_to_api(test_case)
339            if not self._can_run(api):
340                result = 'Skipped'
341                logging.info('Skipping on %s: %s', self._gpu_type, test_case)
342            else:
343                executable = self._get_executable(api)
344                command = ('%s '
345                           '--deqp-case=%s '
346                           '--deqp-surface-type=%s '
347                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
348                           '--deqp-log-images=disable '
349                           '--deqp-watchdog=enable '
350                           '--deqp-surface-width=%d '
351                           '--deqp-surface-height=%d '
352                           '--deqp-log-filename=%s' %
353                           (executable, test_case, self._surface, width, height,
354                            log_file))
355                logging.debug('Running single: %s', command)
356
357                # Must be in the executable directory when running for it to find it's
358                # test data files!
359                os.chdir(os.path.dirname(executable))
360
361                # Must initialize because some errors don't repopulate
362                # run_result, leaving old results.
363                run_result = {}
364                start_time = time.time()
365                try:
366                    run_result = utils.run(
367                        command,
368                        env=self._env,
369                        timeout=self._timeout,
370                        stderr_is_expected=False,
371                        ignore_status=True)
372                    result_counts = self._parse_test_results(
373                        log_file, failing_test=failing_test)
374                    self._archive_test_results(log_file)
375                    if result_counts:
376                        result = result_counts.keys()[0]
377                    else:
378                        result = 'Unknown'
379                except error.CmdTimeoutError:
380                    result = 'TestTimeout'
381                except error.CmdError:
382                    result = 'CommandFailed'
383                except Exception:
384                    result = 'UnexpectedError'
385                end_time = time.time()
386
387                if self._debug:
388                    # Collect debug info and save to json file.
389                    output_msgs = {
390                        'start_time': start_time,
391                        'end_time': end_time,
392                        'stdout': [],
393                        'stderr': [],
394                        'dmesg': []
395                    }
396                    logs = self._log_reader.get_logs()
397                    self._log_reader.set_start_by_current()
398                    output_msgs['dmesg'] = [
399                        msg for msg in logs.splitlines()
400                        if self._log_filter.match(msg)
401                    ]
402                    if run_result:
403                        output_msgs['stdout'] = run_result.stdout.splitlines()
404                        output_msgs['stderr'] = run_result.stderr.splitlines()
405                    with open(debug_file, 'w') as fd:
406                        json.dump(
407                            output_msgs,
408                            fd,
409                            indent=4,
410                            separators=(',', ' : '),
411                            sort_keys=True)
412
413            logging.info('Result: %s', result)
414            test_results[result] = test_results.get(result, 0) + 1
415
416        return test_results
417
418    def _run_tests_hasty(self, test_cases, failing_test=None):
419        """Runs tests as quickly as possible.
420
421        This function runs all the test cases, but does not isolate tests and
422        may take shortcuts/not run all tests to provide maximum coverage at
423        minumum runtime.
424
425        @param test_cases: List of dEQP test case strings.
426        @param failing_test: Test considered failed will append to it.
427
428        @return: dictionary of test results.
429        """
430        # TODO(ihf): It saves half the test time to use 32*32 but a few tests
431        # fail as they need surfaces larger than 200*200.
432        width = self._width
433        height = self._height
434        results = {}
435
436        # All tests combined less than 1h in hasty.
437        batch_timeout = min(3600, self._timeout * self._hasty_batch_size)
438        num_test_cases = len(test_cases)
439
440        # We are dividing the number of tests into several shards but run them
441        # in smaller batches. We start and end at multiples of batch_size
442        # boundaries.
443        shard_start = self._hasty_batch_size * (
444            (self._shard_number *
445             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
446        shard_end = self._hasty_batch_size * (
447            ((self._shard_number + 1) *
448             (num_test_cases / self._shard_count)) / self._hasty_batch_size)
449        # The last shard will be slightly larger than the others. Extend it to
450        # cover all test cases avoiding rounding problems with the integer
451        # arithmetics done to compute shard_start and shard_end.
452        if self._shard_number + 1 == self._shard_count:
453            shard_end = num_test_cases
454
455        for batch in xrange(shard_start, shard_end, self._hasty_batch_size):
456            batch_to = min(batch + self._hasty_batch_size, shard_end)
457            batch_cases = '\n'.join(test_cases[batch:batch_to])
458            # This assumes all tests in the batch are kicked off via the same
459            # executable.
460            api = self._translate_name_to_api(test_cases[batch])
461            if not self._can_run(api):
462                logging.info('Skipping tests on %s: %s', self._gpu_type,
463                             batch_cases)
464            else:
465                executable = self._get_executable(api)
466                log_file = os.path.join(
467                    self._log_path, '%s_hasty_%d.log' % (self._filter, batch))
468                command = ('%s '
469                           '--deqp-stdin-caselist '
470                           '--deqp-surface-type=%s '
471                           '--deqp-gl-config-name=rgba8888d24s8ms0 '
472                           '--deqp-log-images=disable '
473                           '--deqp-visibility=hidden '
474                           '--deqp-watchdog=enable '
475                           '--deqp-surface-width=%d '
476                           '--deqp-surface-height=%d '
477                           '--deqp-log-filename=%s' %
478                           (executable, self._surface, width, height, log_file))
479
480                logging.info('Running tests %d...%d out of %d:\n%s\n%s',
481                             batch + 1, batch_to, num_test_cases, command,
482                             batch_cases)
483
484                # Must be in the executable directory when running for it to
485                # find it's test data files!
486                os.chdir(os.path.dirname(executable))
487
488                try:
489                    utils.run(
490                        command,
491                        env=self._env,
492                        timeout=batch_timeout,
493                        stderr_is_expected=False,
494                        ignore_status=False,
495                        stdin=batch_cases)
496                except Exception:
497                    pass
498                # We are trying to handle all errors by parsing the log file.
499                results = self._parse_test_results(log_file, results,
500                                                   failing_test)
501                self._archive_test_results(log_file)
502                logging.info(results)
503        return results
504
505    def _run_once(self, test_cases):
506        """Run dEQP test_cases in individual/hasty mode.
507
508        @param test_cases: test cases to run.
509        """
510        failing_test = []
511        if self._hasty:
512            logging.info('Running in hasty mode.')
513            test_results = self._run_tests_hasty(test_cases, failing_test)
514        else:
515            logging.info('Running each test individually.')
516            test_results = self._run_tests_individually(test_cases,
517                                                        failing_test)
518        return test_results, failing_test
519
520    def run_once(self, opts=None):
521        options = dict(
522            filter='',
523            test_names='',  # e.g., dEQP-GLES3.info.version,
524            # dEQP-GLES2.functional,
525            # dEQP-GLES3.accuracy.texture, etc.
526            test_names_file='',
527            timeout=self._timeout,
528            subset_to_run='Pass',  # Pass, Fail, Timeout, NotPass...
529            hasty='False',
530            shard_number='0',
531            shard_count='1',
532            debug='False',
533            perf_failure_description=None)
534        if opts is None:
535            opts = []
536        options.update(utils.args_to_dict(opts))
537        logging.info('Test Options: %s', options)
538
539        self._hasty = (options['hasty'] == 'True')
540        self._timeout = int(options['timeout'])
541        self._test_names_file = options['test_names_file']
542        self._test_names = options['test_names']
543        self._shard_number = int(options['shard_number'])
544        self._shard_count = int(options['shard_count'])
545        self._debug = (options['debug'] == 'True')
546        if not (self._test_names_file or self._test_names):
547            self._filter = options['filter']
548            if not self._filter:
549                raise error.TestFail('Failed: No dEQP test filter specified')
550        if options['perf_failure_description']:
551            self._test_failure_description = options['perf_failure_description']
552        else:
553            # Do not report failure if failure description is not specified.
554            self._test_failure_report_enable = False
555
556        # Some information to help post-process logs into blacklists later.
557        logging.info('ChromeOS BOARD = %s', self._board)
558        logging.info('ChromeOS CPU family = %s', self._cpu_type)
559        logging.info('ChromeOS GPU family = %s', self._gpu_type)
560
561        # Create a place to put detailed test output logs.
562        filter_name = self._filter or os.path.basename(self._test_names_file)
563        logging.info('dEQP test filter = %s', filter_name)
564        self._log_path = os.path.join(tempfile.gettempdir(),
565                                      '%s-logs' % filter_name)
566        shutil.rmtree(self._log_path, ignore_errors=True)
567        os.mkdir(self._log_path)
568
569        # Load either tests specified by test_names_file, test_names or filter.
570        test_cases = []
571        if self._test_names_file:
572            test_cases = self._get_test_cases_from_names_file()
573        elif self._test_names:
574            test_cases = []
575            for name in self._test_names.split(','):
576                test_cases.extend(self._get_test_cases(name, 'Pass'))
577        elif self._filter:
578            test_cases = self._get_test_cases(self._filter,
579                                              options['subset_to_run'])
580
581        if self._debug:
582            # LogReader works on /var/log/messages by default.
583            self._log_reader = cros_logging.LogReader()
584            self._log_reader.set_start_by_current()
585
586        # Assume all tests failed at the beginning.
587        for test_case in test_cases:
588            self.add_failures(test_case)
589
590        test_results, failing_test = self._run_once(test_cases)
591        # Rerun the test if we are in hasty mode.
592        if self._hasty and len(failing_test) > 0:
593            if len(failing_test) < sum(test_results.values()) * RERUN_RATIO:
594                logging.info('Because we are in hasty mode, we will rerun the '
595                             'failing tests one at a time')
596                rerun_results, failing_test = self._run_once(failing_test)
597                # Update failing test result from the test_results
598                for result in test_results:
599                    if result.lower() not in self.TEST_RESULT_FILTER:
600                        test_results[result] = 0
601                for result in rerun_results:
602                    test_results[result] = (
603                        test_results.get(result, 0) + rerun_results[result])
604            else:
605                logging.info('There are too many failing tests. It would '
606                             'take too long to rerun them. Giving up.')
607
608        # Update failing tests to the chrome perf dashboard records.
609        for test_case in test_cases:
610            if test_case not in failing_test:
611                self.remove_failures(test_case)
612
613        logging.info('Test results:')
614        logging.info(test_results)
615        logging.debug('Test Failed: %s', failing_test)
616        self.write_perf_keyval(test_results)
617
618        test_count = 0
619        test_failures = 0
620        test_passes = 0
621        test_skipped = 0
622        for result in test_results:
623            test_count += test_results[result]
624            if result.lower() in ['pass']:
625                test_passes += test_results[result]
626            if result.lower() not in self.TEST_RESULT_FILTER:
627                test_failures += test_results[result]
628            if result.lower() in ['skipped']:
629                test_skipped += test_results[result]
630        # The text "Completed all tests." is used by the process_log.py script
631        # and should always appear at the end of a completed test run.
632        logging.info(
633            'Completed all tests. Saw %d tests, %d passes and %d failures.',
634            test_count, test_passes, test_failures)
635
636        if self._filter and test_count == 0 and options[
637                'subset_to_run'] != 'NotPass':
638            logging.warning('No test cases found for filter: %s!', self._filter)
639
640        if test_failures:
641            raise error.TestFail('Failed: on %s %d/%d tests failed.' %
642                                 (self._gpu_type, test_failures, test_count))
643        if test_skipped > 0:
644            logging.info('On %s %d tests skipped, %d passes', self._gpu_type,
645                         test_skipped, test_passes)
646