1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import abc
6import datetime
7import difflib
8import functools
9import hashlib
10import logging
11import operator
12import os
13import re
14import sys
15import warnings
16
17import common
18
19from autotest_lib.frontend.afe.json_rpc import proxy
20from autotest_lib.client.common_lib import control_data
21from autotest_lib.client.common_lib import enum
22from autotest_lib.client.common_lib import error
23from autotest_lib.client.common_lib import global_config
24from autotest_lib.client.common_lib import priorities
25from autotest_lib.client.common_lib import time_utils
26from autotest_lib.client.common_lib import utils
27from autotest_lib.frontend.afe import model_attributes
28from autotest_lib.frontend.afe.json_rpc import proxy
29from autotest_lib.server.cros import provision
30from autotest_lib.server.cros.dynamic_suite import constants
31from autotest_lib.server.cros.dynamic_suite import control_file_getter
32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
33from autotest_lib.server.cros.dynamic_suite import job_status
34from autotest_lib.server.cros.dynamic_suite import tools
35from autotest_lib.server.cros.dynamic_suite.job_status import Status
36
37try:
38    from chromite.lib import boolparse_lib
39    from chromite.lib import cros_logging as logging
40except ImportError:
41    print 'Unable to import chromite.'
42    print 'This script must be either:'
43    print '  - Be run in the chroot.'
44    print '  - (not yet supported) be run after running '
45    print '    ../utils/build_externals.py'
46
47_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
48                    'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
49                    'sanity', 'push_to_prod']
50_AUTOTEST_DIR = global_config.global_config.get_config_value(
51        'SCHEDULER', 'drone_installation_directory')
52ENABLE_CONTROLS_IN_BATCH = global_config.global_config.get_config_value(
53        'CROS', 'enable_getting_controls_in_batch', type=bool, default=False)
54
55class RetryHandler(object):
56    """Maintain retry information.
57
58    @var _retry_map: A dictionary that stores retry history.
59            The key is afe job id. The value is a dictionary.
60            {job_id: {'state':RetryHandler.States, 'retry_max':int}}
61            - state:
62                The retry state of a job.
63                NOT_ATTEMPTED:
64                    We haven't done anything about the job.
65                ATTEMPTED:
66                    We've made an attempt to schedule a retry job. The
67                    scheduling may or may not be successful, e.g.
68                    it might encounter an rpc error. Note failure
69                    in scheduling a retry is different from a retry job failure.
70                    For each job, we only attempt to schedule a retry once.
71                    For example, assume we have a test with JOB_RETRIES=5 and
72                    its second retry job failed. When we attempt to create
73                    a third retry job to retry the second, we hit an rpc
74                    error. In such case, we will give up on all following
75                    retries.
76                RETRIED:
77                    A retry job has already been successfully
78                    scheduled.
79            - retry_max:
80                The maximum of times the job can still
81                be retried, taking into account retries
82                that have occurred.
83    @var _retry_level: A retry might be triggered only if the result
84            is worse than the level.
85    @var _max_retries: Maximum retry limit at suite level.
86                     Regardless how many times each individual test
87                     has been retried, the total number of retries happening in
88                     the suite can't exceed _max_retries.
89    """
90
91    States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
92                       start_value=1, step=1)
93
94    def __init__(self, initial_jobs_to_tests, retry_level='WARN',
95                 max_retries=None):
96        """Initialize RetryHandler.
97
98        @param initial_jobs_to_tests: A dictionary that maps a job id to
99                a ControlData object. This dictionary should contain
100                jobs that are originally scheduled by the suite.
101        @param retry_level: A retry might be triggered only if the result is
102                worse than the level.
103        @param max_retries: Integer, maxmium total retries allowed
104                                  for the suite. Default to None, no max.
105        """
106        self._retry_map = {}
107        self._retry_level = retry_level
108        self._max_retries = (max_retries
109                             if max_retries is not None else sys.maxint)
110        for job_id, test in initial_jobs_to_tests.items():
111            if test.job_retries > 0:
112                self._add_job(new_job_id=job_id,
113                              retry_max=test.job_retries)
114
115
116    def _add_job(self, new_job_id, retry_max):
117        """Add a newly-created job to the retry map.
118
119        @param new_job_id: The afe_job_id of a newly created job.
120        @param retry_max: The maximum of times that we could retry
121                          the test if the job fails.
122
123        @raises ValueError if new_job_id is already in retry map.
124
125        """
126        if new_job_id in self._retry_map:
127            raise ValueError('add_job called when job is already in retry map.')
128
129        self._retry_map[new_job_id] = {
130                'state': self.States.NOT_ATTEMPTED,
131                'retry_max': retry_max}
132
133
134    def _suite_max_reached(self):
135        """Return whether maximum retry limit for a suite has been reached."""
136        return self._max_retries <= 0
137
138
139    def add_retry(self, old_job_id, new_job_id):
140        """Record a retry.
141
142        Update retry map with the retry information.
143
144        @param old_job_id: The afe_job_id of the job that is retried.
145        @param new_job_id: The afe_job_id of the retry job.
146
147        @raises KeyError if old_job_id isn't in the retry map.
148        @raises ValueError if we have already retried or made an attempt
149                to retry the old job.
150
151        """
152        old_record = self._retry_map[old_job_id]
153        if old_record['state'] != self.States.NOT_ATTEMPTED:
154            raise ValueError(
155                    'We have already retried or attempted to retry job %d' %
156                    old_job_id)
157        old_record['state'] = self.States.RETRIED
158        self._add_job(new_job_id=new_job_id,
159                      retry_max=old_record['retry_max'] - 1)
160        self._max_retries -= 1
161
162
163    def set_attempted(self, job_id):
164        """Set the state of the job to ATTEMPTED.
165
166        @param job_id: afe_job_id of a job.
167
168        @raises KeyError if job_id isn't in the retry map.
169        @raises ValueError if the current state is not NOT_ATTEMPTED.
170
171        """
172        current_state = self._retry_map[job_id]['state']
173        if current_state != self.States.NOT_ATTEMPTED:
174            # We are supposed to retry or attempt to retry each job
175            # only once. Raise an error if this is not the case.
176            raise ValueError('Unexpected state transition: %s -> %s' %
177                             (self.States.get_string(current_state),
178                              self.States.get_string(self.States.ATTEMPTED)))
179        else:
180            self._retry_map[job_id]['state'] = self.States.ATTEMPTED
181
182
183    def has_following_retry(self, result):
184        """Check whether there will be a following retry.
185
186        We have the following cases for a given job id (result.id),
187        - no retry map entry -> retry not required, no following retry
188        - has retry map entry:
189            - already retried -> has following retry
190            - has not retried
191                (this branch can be handled by checking should_retry(result))
192                - retry_max == 0 --> the last retry job, no more retry
193                - retry_max > 0
194                   - attempted, but has failed in scheduling a
195                     following retry due to rpc error  --> no more retry
196                   - has not attempped --> has following retry if test failed.
197
198        @param result: A result, encapsulating the status of the job.
199
200        @returns: True, if there will be a following retry.
201                  False otherwise.
202
203        """
204        return (result.test_executed
205                and result.id in self._retry_map
206                and (self._retry_map[result.id]['state'] == self.States.RETRIED
207                     or self._should_retry(result)))
208
209
210    def _should_retry(self, result):
211        """Check whether we should retry a job based on its result.
212
213        We will retry the job that corresponds to the result
214        when all of the following are true.
215        a) The test was actually executed, meaning that if
216           a job was aborted before it could ever reach the state
217           of 'Running', the job will not be retried.
218        b) The result is worse than |self._retry_level| which
219           defaults to 'WARN'.
220        c) The test requires retry, i.e. the job has an entry in the retry map.
221        d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
222           Note that if a test has JOB_RETRIES=5, and the second time
223           it was retried it hit an rpc error, we will give up on
224           all following retries.
225        e) The job has not reached its retry max, i.e. retry_max > 0
226
227        @param result: A result, encapsulating the status of the job.
228
229        @returns: True if we should retry the job.
230
231        """
232        return (
233            result.test_executed
234            and result.id in self._retry_map
235            and not self._suite_max_reached()
236            and result.is_worse_than(
237                job_status.Status(self._retry_level, '', 'reason'))
238            and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
239            and self._retry_map[result.id]['retry_max'] > 0
240        )
241
242
243    def get_retry_max(self, job_id):
244        """Get the maximum times the job can still be retried.
245
246        @param job_id: afe_job_id of a job.
247
248        @returns: An int, representing the maximum times the job can still be
249                  retried.
250        @raises KeyError if job_id isn't in the retry map.
251
252        """
253        return self._retry_map[job_id]['retry_max']
254
255
256class _SuiteChildJobCreator(object):
257    """Create test jobs for a suite."""
258
259    def __init__(
260            self,
261            tag,
262            builds,
263            board,
264            afe=None,
265            max_runtime_mins=24*60,
266            timeout_mins=24*60,
267            suite_job_id=None,
268            ignore_deps=False,
269            extra_deps=(),
270            priority=priorities.Priority.DEFAULT,
271            offload_failures_only=False,
272            test_source_build=None,
273            job_keyvals=None,
274    ):
275        """
276        Constructor
277
278        @param tag: a string with which to tag jobs run in this suite.
279        @param builds: the builds on which we're running this suite.
280        @param board: the board on which we're running this suite.
281        @param afe: an instance of AFE as defined in server/frontend.py.
282        @param max_runtime_mins: Maximum suite runtime, in minutes.
283        @param timeout_mins: Maximum job lifetime, in minutes.
284        @param suite_job_id: Job id that will act as parent id to all sub jobs.
285                             Default: None
286        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
287                            attribute and skip applying of dependency labels.
288                            (Default:False)
289        @param extra_deps: A list of strings which are the extra DEPENDENCIES
290                           to add to each test being scheduled.
291        @param priority: Integer priority level.  Higher is more important.
292        @param offload_failures_only: Only enable gs_offloading for failed
293                                      jobs.
294        @param test_source_build: Build that contains the server-side test code.
295        @param job_keyvals: General job keyvals to be inserted into keyval file,
296                            which will be used by tko/parse later.
297        """
298        self._tag = tag
299        self._builds = builds
300        self._board = board
301        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
302                                                         delay_sec=10,
303                                                         debug=False)
304        self._max_runtime_mins = max_runtime_mins
305        self._timeout_mins = timeout_mins
306        self._suite_job_id = suite_job_id
307        self._ignore_deps = ignore_deps
308        self._extra_deps = tuple(extra_deps)
309        self._priority = priority
310        self._offload_failures_only = offload_failures_only
311        self._test_source_build = test_source_build
312        self._job_keyvals = job_keyvals
313
314
315    @property
316    def cros_build(self):
317        """Return the CrOS build or the first build in the builds dict."""
318        # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
319        # sure what the implications of this are, but it's probably not a
320        # good thing.
321        return self._builds.get(provision.CROS_VERSION_PREFIX,
322                                self._builds.values()[0])
323
324
325    def create_job(self, test, retry_for=None):
326        """
327        Thin wrapper around frontend.AFE.create_job().
328
329        @param test: ControlData object for a test to run.
330        @param retry_for: If the to-be-created job is a retry for an
331                          old job, the afe_job_id of the old job will
332                          be passed in as |retry_for|, which will be
333                          recorded in the new job's keyvals.
334        @returns: A frontend.Job object with an added test_name member.
335                  test_name is used to preserve the higher level TEST_NAME
336                  name of the job.
337        """
338        # For a system running multiple suites which share tests, the priority
339        # overridden may lead to unexpected scheduling order that adds extra
340        # provision jobs.
341        test_priority = self._priority
342        if utils.is_moblab():
343            test_priority = max(self._priority, test.priority)
344
345        reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
346                         else None)
347
348        test_obj = self._afe.create_job(
349            control_file=test.text,
350            name=tools.create_job_name(
351                    self._test_source_build or self.cros_build,
352                    self._tag,
353                    test.name),
354            control_type=test.test_type.capitalize(),
355            meta_hosts=[self._board]*test.sync_count,
356            dependencies=self._create_job_deps(test),
357            keyvals=self._create_keyvals_for_test_job(test, retry_for),
358            max_runtime_mins=self._max_runtime_mins,
359            timeout_mins=self._timeout_mins,
360            parent_job_id=self._suite_job_id,
361            test_retry=test.retries,
362            reboot_before=reboot_before,
363            run_reset=not test.fast,
364            priority=test_priority,
365            synch_count=test.sync_count,
366            require_ssp=test.require_ssp)
367
368        test_obj.test_name = test.name
369        return test_obj
370
371
372    def _create_job_deps(self, test):
373        """Create job deps list for a test job.
374
375        @returns: A list of dependency strings.
376        """
377        if self._ignore_deps:
378            job_deps = []
379        else:
380            job_deps = list(test.dependencies)
381        job_deps.extend(self._extra_deps)
382        return job_deps
383
384
385    def _create_keyvals_for_test_job(self, test, retry_for=None):
386        """Create keyvals dict for creating a test job.
387
388        @param test: ControlData object for a test to run.
389        @param retry_for: If the to-be-created job is a retry for an
390                          old job, the afe_job_id of the old job will
391                          be passed in as |retry_for|, which will be
392                          recorded in the new job's keyvals.
393        @returns: A keyvals dict for creating the test job.
394        """
395        keyvals = {
396            constants.JOB_BUILD_KEY: self.cros_build,
397            constants.JOB_SUITE_KEY: self._tag,
398            constants.JOB_EXPERIMENTAL_KEY: test.experimental,
399            constants.JOB_BUILDS_KEY: self._builds
400        }
401        # test_source_build is saved to job_keyvals so scheduler can retrieve
402        # the build name from database when compiling autoserv commandline.
403        # This avoid a database change to add a new field in afe_jobs.
404        #
405        # Only add `test_source_build` to job keyvals if the build is different
406        # from the CrOS build or the job uses more than one build, e.g., both
407        # firmware and CrOS will be updated in the dut.
408        # This is for backwards compatibility, so the update Autotest code can
409        # compile an autoserv command line to run in a SSP container using
410        # previous builds.
411        if (self._test_source_build and
412            (self.cros_build != self._test_source_build or
413             len(self._builds) > 1)):
414            keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
415                    self._test_source_build
416            for prefix, build in self._builds.iteritems():
417                if prefix == provision.FW_RW_VERSION_PREFIX:
418                    keyvals[constants.FWRW_BUILD]= build
419                elif prefix == provision.FW_RO_VERSION_PREFIX:
420                    keyvals[constants.FWRO_BUILD] = build
421        # Add suite job id to keyvals so tko parser can read it from keyval
422        # file.
423        if self._suite_job_id:
424            keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
425        # We drop the old job's id in the new job's keyval file so that
426        # later our tko parser can figure out the retry relationship and
427        # invalidate the results of the old job in tko database.
428        if retry_for:
429            keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
430        if self._offload_failures_only:
431            keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
432        if self._job_keyvals:
433            for key in constants.INHERITED_KEYVALS:
434                if key in self._job_keyvals:
435                    keyvals[key] = self._job_keyvals[key]
436        return keyvals
437
438
439def _get_cf_retriever(cf_getter, forgiving_parser=True, run_prod_code=False,
440                      test_args=None):
441    """Return the correct _ControlFileRetriever instance.
442
443    If cf_getter is a File system ControlFileGetter, return a
444    _ControlFileRetriever.  This performs a full parse of the root
445    directory associated with the getter. This is the case when it's
446    invoked from suite_preprocessor.
447
448    If cf_getter is a devserver getter, return a
449    _BatchControlFileRetriever.  This looks up the suite_name in a suite
450    to control file map generated at build time, and parses the relevant
451    control files alone. This lookup happens on the devserver, so as far
452    as this method is concerned, both cases are equivalent. If
453    enable_controls_in_batch is switched on, this function will call
454    cf_getter.get_suite_info() to get a dict of control files and
455    contents in batch.
456    """
457    if _should_batch_with(cf_getter):
458        cls = _BatchControlFileRetriever
459    else:
460        cls = _ControlFileRetriever
461    return cls(cf_getter, forgiving_parser, run_prod_code, test_args)
462
463
464def _should_batch_with(cf_getter):
465    """Return whether control files should be fetched in batch.
466
467    This depends on the control file getter and configuration options.
468
469    @param cf_getter: a control_file_getter.ControlFileGetter used to list
470           and fetch the content of control files
471    """
472    return (ENABLE_CONTROLS_IN_BATCH
473            and isinstance(cf_getter, control_file_getter.DevServerGetter))
474
475
476class _ControlFileRetriever(object):
477    """Retrieves control files.
478
479    This returns control data instances, unlike control file getters
480    which simply return the control file text contents.
481    """
482
483    def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
484                 test_args=None):
485        """Initialize instance.
486
487        @param cf_getter: a control_file_getter.ControlFileGetter used to list
488               and fetch the content of control files
489        @param forgiving_parser: If False, will raise ControlVariableExceptions
490                                 if any are encountered when parsing control
491                                 files. Note that this can raise an exception
492                                 for syntax errors in unrelated files, because
493                                 we parse them before applying the predicate.
494        @param run_prod_code: If true, the retrieved tests will run the test
495                              code that lives in prod aka the test code
496                              currently on the lab servers by disabling
497                              SSP for the discovered tests.
498        @param test_args: A dict of args to be seeded in test control file under
499                          the name |args_dict|.
500        """
501        self._cf_getter = cf_getter
502        self._forgiving_parser = forgiving_parser
503        self._run_prod_code = run_prod_code
504        self._test_args = test_args
505
506
507    def retrieve(self, test_name):
508        """Retrieve a test's control data.
509
510        This ignores forgiving_parser because we cannot return a
511        forgiving value.
512
513        @param test_name: Name of test to retrieve.
514
515        @raises ControlVariableException: There is a syntax error in a
516                                          control file.
517
518        @returns a ControlData object
519        """
520        path = self._cf_getter.get_control_file_path(test_name)
521        text = self._cf_getter.get_control_file_contents(path)
522        return self._parse_cf_text(path, text)
523
524
525    def retrieve_for_suite(self, suite_name=''):
526        """Scan through all tests and find all tests.
527
528        @param suite_name: If specified, this method will attempt to restrain
529                           the search space to just this suite's control files.
530
531        @raises ControlVariableException: If forgiving_parser is False and there
532                                          is a syntax error in a control file.
533
534        @returns a dictionary of ControlData objects that based on given
535                 parameters.
536        """
537        control_file_texts = self._get_cf_texts_for_suite(suite_name)
538        return self._parse_cf_text_many(control_file_texts)
539
540
541    def _filter_cf_paths(self, paths):
542        """Remove certain control file paths
543
544        @param paths: Iterable of paths
545        @returns: generator yielding paths
546        """
547        matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
548        return (path for path in paths if not matcher.match(path))
549
550
551    def _get_cf_texts_for_suite(self, suite_name):
552        """Get control file content for given suite.
553
554        @param suite_name: If specified, this method will attempt to restrain
555                           the search space to just this suite's control files.
556        @returns: generator yielding (path, text) tuples
557        """
558        files = self._cf_getter.get_control_file_list(suite_name=suite_name)
559        filtered_files = self._filter_cf_paths(files)
560        for path in filtered_files:
561            yield path, self._cf_getter.get_control_file_contents(path)
562
563
564    def _parse_cf_text_many(self, control_file_texts):
565        """Parse control file texts.
566
567        @param control_file_texts: iterable of (path, text) pairs
568        @returns: a dictionary of ControlData objects
569        """
570        tests = {}
571        for path, text in control_file_texts:
572            # Seed test_args into the control file.
573            if self._test_args:
574                text = tools.inject_vars(self._test_args, text)
575            try:
576                found_test = self._parse_cf_text(path, text)
577            except control_data.ControlVariableException, e:
578                if not self._forgiving_parser:
579                    msg = "Failed parsing %s\n%s" % (path, e)
580                    raise control_data.ControlVariableException(msg)
581                logging.warning("Skipping %s\n%s", path, e)
582            except Exception, e:
583                logging.error("Bad %s\n%s", path, e)
584            else:
585                tests[path] = found_test
586        return tests
587
588
589    def _parse_cf_text(self, path, text):
590        """Parse control file text.
591
592        This ignores forgiving_parser because we cannot return a
593        forgiving value.
594
595        @param path: path to control file
596        @param text: control file text contents
597        @returns: a ControlData object
598
599        @raises ControlVariableException: There is a syntax error in a
600                                          control file.
601        """
602        test = control_data.parse_control_string(
603                text, raise_warnings=True, path=path)
604        test.text = text
605        if self._run_prod_code:
606            test.require_ssp = False
607        return test
608
609
610class _BatchControlFileRetriever(_ControlFileRetriever):
611    """Subclass that can retrieve suite control files in batch."""
612
613
614    def _get_cf_texts_for_suite(self, suite_name):
615        """Get control file content for given suite.
616
617        @param suite_name: If specified, this method will attempt to restrain
618                           the search space to just this suite's control files.
619        @returns: generator yielding (path, text) tuples
620        """
621        suite_info = self._cf_getter.get_suite_info(suite_name=suite_name)
622        files = suite_info.keys()
623        filtered_files = self._filter_cf_paths(files)
624        for path in filtered_files:
625            yield path, suite_info[path]
626
627
628def get_test_source_build(builds, **dargs):
629    """Get the build of test code.
630
631    Get the test source build from arguments. If parameter
632    `test_source_build` is set and has a value, return its value. Otherwise
633    returns the ChromeOS build name if it exists. If ChromeOS build is not
634    specified either, raise SuiteArgumentException.
635
636    @param builds: the builds on which we're running this suite. It's a
637                   dictionary of version_prefix:build.
638    @param **dargs: Any other Suite constructor parameters, as described
639                    in Suite.__init__ docstring.
640
641    @return: The build contains the test code.
642    @raise: SuiteArgumentException if both test_source_build and ChromeOS
643            build are not specified.
644
645    """
646    if dargs.get('test_source_build', None):
647        return dargs['test_source_build']
648    cros_build = builds.get(provision.CROS_VERSION_PREFIX, None)
649    if cros_build.endswith(provision.CHEETS_SUFFIX):
650        test_source_build = re.sub(
651                provision.CHEETS_SUFFIX + '$', '', cros_build)
652    else:
653        test_source_build = cros_build
654    if not test_source_build:
655        raise error.SuiteArgumentException(
656                'test_source_build must be specified if CrOS build is not '
657                'specified.')
658    return test_source_build
659
660
661def list_all_suites(build, devserver, cf_getter=None):
662    """
663    Parses all ControlData objects with a SUITE tag and extracts all
664    defined suite names.
665
666    @param build: the build on which we're running this suite.
667    @param devserver: the devserver which contains the build.
668    @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
669                      using DevServerGetter.
670
671    @return list of suites
672    """
673    if cf_getter is None:
674        cf_getter = _create_ds_getter(build, devserver)
675
676    suites = set()
677    predicate = lambda t: True
678    for test in find_and_parse_tests(cf_getter, predicate):
679        suites.update(test.suite_tag_parts)
680    return list(suites)
681
682
683def test_file_similarity_predicate(test_file_pattern):
684    """Returns predicate that gets the similarity based on a test's file
685    name pattern.
686
687    Builds a predicate that takes in a parsed control file (a ControlData)
688    and returns a tuple of (file path, ratio), where ratio is the
689    similarity between the test file name and the given test_file_pattern.
690
691    @param test_file_pattern: regular expression (string) to match against
692                              control file names.
693    @return a callable that takes a ControlData and and returns a tuple of
694            (file path, ratio), where ratio is the similarity between the
695            test file name and the given test_file_pattern.
696    """
697    return lambda t: ((None, 0) if not hasattr(t, 'path') else
698            (t.path, difflib.SequenceMatcher(a=t.path,
699                                             b=test_file_pattern).ratio()))
700
701
702def test_name_similarity_predicate(test_name):
703    """Returns predicate that matched based on a test's name.
704
705    Builds a predicate that takes in a parsed control file (a ControlData)
706    and returns a tuple of (test name, ratio), where ratio is the similarity
707    between the test name and the given test_name.
708
709    @param test_name: the test name to base the predicate on.
710    @return a callable that takes a ControlData and returns a tuple of
711            (test name, ratio), where ratio is the similarity between the
712            test name and the given test_name.
713    """
714    return lambda t: ((None, 0) if not hasattr(t, 'name') else
715            (t.name,
716             difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
717
718
719def matches_attribute_expression_predicate(test_attr_boolstr):
720    """Returns predicate that matches based on boolean expression of
721    attributes.
722
723    Builds a predicate that takes in a parsed control file (a ControlData)
724    ans returns True if the test attributes satisfy the given attribute
725    boolean expression.
726
727    @param test_attr_boolstr: boolean expression of the attributes to be
728                              test, like 'system:all and interval:daily'.
729
730    @return a callable that takes a ControlData and returns True if the test
731            attributes satisfy the given boolean expression.
732    """
733    return lambda t: boolparse_lib.BoolstrResult(
734        test_attr_boolstr, t.attributes)
735
736
737def test_file_matches_pattern_predicate(test_file_pattern):
738    """Returns predicate that matches based on a test's file name pattern.
739
740    Builds a predicate that takes in a parsed control file (a ControlData)
741    and returns True if the test's control file name matches the given
742    regular expression.
743
744    @param test_file_pattern: regular expression (string) to match against
745                              control file names.
746    @return a callable that takes a ControlData and and returns
747            True if control file name matches the pattern.
748    """
749    return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
750                                                     t.path)
751
752
753def test_name_matches_pattern_predicate(test_name_pattern):
754    """Returns predicate that matches based on a test's name pattern.
755
756    Builds a predicate that takes in a parsed control file (a ControlData)
757    and returns True if the test name matches the given regular expression.
758
759    @param test_name_pattern: regular expression (string) to match against
760                              test names.
761    @return a callable that takes a ControlData and returns
762            True if the name fields matches the pattern.
763    """
764    return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
765                                                     t.name)
766
767
768def test_name_equals_predicate(test_name):
769    """Returns predicate that matched based on a test's name.
770
771    Builds a predicate that takes in a parsed control file (a ControlData)
772    and returns True if the test name is equal to |test_name|.
773
774    @param test_name: the test name to base the predicate on.
775    @return a callable that takes a ControlData and looks for |test_name|
776            in that ControlData's name.
777    """
778    return lambda t: hasattr(t, 'name') and test_name == t.name
779
780
781def name_in_tag_similarity_predicate(name):
782    """Returns predicate that takes a control file and gets the similarity
783    of the suites in the control file and the given name.
784
785    Builds a predicate that takes in a parsed control file (a ControlData)
786    and returns a list of tuples of (suite name, ratio), where suite name
787    is each suite listed in the control file, and ratio is the similarity
788    between each suite and the given name.
789
790    @param name: the suite name to base the predicate on.
791    @return a callable that takes a ControlData and returns a list of tuples
792            of (suite name, ratio), where suite name is each suite listed in
793            the control file, and ratio is the similarity between each suite
794            and the given name.
795    """
796    return lambda t: [(suite,
797                       difflib.SequenceMatcher(a=suite, b=name).ratio())
798                      for suite in t.suite_tag_parts] or [(None, 0)]
799
800
801def name_in_tag_predicate(name):
802    """Returns predicate that takes a control file and looks for |name|.
803
804    Builds a predicate that takes in a parsed control file (a ControlData)
805    and returns True if the SUITE tag is present and contains |name|.
806
807    @param name: the suite name to base the predicate on.
808    @return a callable that takes a ControlData and looks for |name| in that
809            ControlData object's suite member.
810    """
811    return lambda t: name in t.suite_tag_parts
812
813
814def create_fs_getter(autotest_dir):
815    """
816    @param autotest_dir: the place to find autotests.
817    @return a FileSystemGetter instance that looks under |autotest_dir|.
818    """
819    # currently hard-coded places to look for tests.
820    subpaths = ['server/site_tests', 'client/site_tests',
821                'server/tests', 'client/tests']
822    directories = [os.path.join(autotest_dir, p) for p in subpaths]
823    return control_file_getter.FileSystemGetter(directories)
824
825
826def _create_ds_getter(build, devserver):
827    """
828    @param build: the build on which we're running this suite.
829    @param devserver: the devserver which contains the build.
830    @return a FileSystemGetter instance that looks under |autotest_dir|.
831    """
832    return control_file_getter.DevServerGetter(build, devserver)
833
834
835def _non_experimental_tests_predicate(test_data):
836    """Test predicate for non-experimental tests."""
837    return not test_data.experimental
838
839
840def find_and_parse_tests(cf_getter, predicate, suite_name='',
841                         add_experimental=False, forgiving_parser=True,
842                         run_prod_code=False, test_args=None):
843    """
844    Function to scan through all tests and find eligible tests.
845
846    Search through all tests based on given cf_getter, suite_name,
847    add_experimental and forgiving_parser, return the tests that match
848    given predicate.
849
850    @param cf_getter: a control_file_getter.ControlFileGetter used to list
851           and fetch the content of control files
852    @param predicate: a function that should return True when run over a
853           ControlData representation of a control file that should be in
854           this Suite.
855    @param suite_name: If specified, this method will attempt to restrain
856                       the search space to just this suite's control files.
857    @param add_experimental: add tests with experimental attribute set.
858    @param forgiving_parser: If False, will raise ControlVariableExceptions
859                             if any are encountered when parsing control
860                             files. Note that this can raise an exception
861                             for syntax errors in unrelated files, because
862                             we parse them before applying the predicate.
863    @param run_prod_code: If true, the suite will run the test code that
864                          lives in prod aka the test code currently on the
865                          lab servers by disabling SSP for the discovered
866                          tests.
867    @param test_args: A dict of args to be seeded in test control file.
868
869    @raises ControlVariableException: If forgiving_parser is False and there
870                                      is a syntax error in a control file.
871
872    @return list of ControlData objects that should be run, with control
873            file text added in |text| attribute. Results are sorted based
874            on the TIME setting in control file, slowest test comes first.
875    """
876    logging.debug('Getting control file list for suite: %s', suite_name)
877    retriever = _get_cf_retriever(cf_getter,
878                                  forgiving_parser=forgiving_parser,
879                                  run_prod_code=run_prod_code,
880                                  test_args=test_args)
881    tests = retriever.retrieve_for_suite(suite_name)
882    logging.debug('Parsed %s control files.', len(tests))
883    if not add_experimental:
884        predicate = _ComposedPredicate([predicate,
885                                        _non_experimental_tests_predicate])
886    tests = [test for test in tests.itervalues() if predicate(test)]
887    tests.sort(key=lambda t:
888               control_data.ControlData.get_test_time_index(t.time),
889               reverse=True)
890    return tests
891
892
893def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
894    """
895    Function to scan through all tests and find possible tests.
896
897    Search through all tests based on given cf_getter, suite_name,
898    add_experimental and forgiving_parser. Use the given predicate to
899    calculate the similarity and return the top 10 matches.
900
901    @param cf_getter: a control_file_getter.ControlFileGetter used to list
902           and fetch the content of control files
903    @param predicate: a function that should return a tuple of (name, ratio)
904           when run over a ControlData representation of a control file that
905           should be in this Suite. `name` is the key to be compared, e.g.,
906           a suite name or test name. `ratio` is a value between [0,1]
907           indicating the similarity of `name` and the value to be compared.
908    @param suite_name: If specified, this method will attempt to restrain
909                       the search space to just this suite's control files.
910    @param count: Number of suggestions to return, default to 10.
911
912    @return list of top names that similar to the given test, sorted by
913            match ratio.
914    """
915    logging.debug('Getting control file list for suite: %s', suite_name)
916    tests = _get_cf_retriever(cf_getter).retrieve_for_suite(suite_name)
917    logging.debug('Parsed %s control files.', len(tests))
918    similarities = {}
919    for test in tests.itervalues():
920        ratios = predicate(test)
921        # Some predicates may return a list of tuples, e.g.,
922        # name_in_tag_similarity_predicate. Convert all returns to a list.
923        if not isinstance(ratios, list):
924            ratios = [ratios]
925        for name, ratio in ratios:
926            similarities[name] = ratio
927    return [s[0] for s in
928            sorted(similarities.items(), key=operator.itemgetter(1),
929                   reverse=True)][:count]
930
931
932def _deprecated_suite_method(func):
933    """Decorator for deprecated Suite static methods.
934
935    TODO(ayatane): This is used to decorate functions that are called as
936    static methods on Suite.
937    """
938    @functools.wraps(func)
939    def wrapper(*args, **kwargs):
940        """Wraps |func| for warning."""
941        warnings.warn('Calling method "%s" from Suite is deprecated' %
942                      func.__name__)
943        return func(*args, **kwargs)
944    return staticmethod(wrapper)
945
946
947class _BaseSuite(object):
948    """
949    A suite of tests, defined by some predicate over control file variables.
950
951    Given a place to search for control files a predicate to match the desired
952    tests, can gather tests and fire off jobs to run them, and then wait for
953    results.
954
955    @var _predicate: a function that should return True when run over a
956         ControlData representation of a control file that should be in
957         this Suite.
958    @var _tag: a string with which to tag jobs run in this suite.
959    @var _builds: the builds on which we're running this suite.
960    @var _afe: an instance of AFE as defined in server/frontend.py.
961    @var _tko: an instance of TKO as defined in server/frontend.py.
962    @var _jobs: currently scheduled jobs, if any.
963    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
964                         ControlData objects.
965    @var _retry: a bool value indicating whether jobs should be retried on
966                 failure.
967    @var _retry_handler: a RetryHandler object.
968
969    """
970
971
972    def __init__(
973            self,
974            tests,
975            tag,
976            builds,
977            board,
978            afe=None,
979            tko=None,
980            pool=None,
981            results_dir=None,
982            max_runtime_mins=24*60,
983            timeout_mins=24*60,
984            file_bugs=False,
985            suite_job_id=None,
986            ignore_deps=False,
987            extra_deps=None,
988            priority=priorities.Priority.DEFAULT,
989            wait_for_results=True,
990            job_retry=False,
991            max_retries=sys.maxint,
992            offload_failures_only=False,
993            test_source_build=None,
994            job_keyvals=None,
995            child_dependencies=(),
996            result_reporter=None,
997    ):
998        """Initialize instance.
999
1000        @param tests: Iterable of tests to run.
1001        @param tag: a string with which to tag jobs run in this suite.
1002        @param builds: the builds on which we're running this suite.
1003        @param board: the board on which we're running this suite.
1004        @param afe: an instance of AFE as defined in server/frontend.py.
1005        @param tko: an instance of TKO as defined in server/frontend.py.
1006        @param pool: Specify the pool of machines to use for scheduling
1007                purposes.
1008        @param results_dir: The directory where the job can write results to.
1009                            This must be set if you want job_id of sub-jobs
1010                            list in the job keyvals.
1011        @param max_runtime_mins: Maximum suite runtime, in minutes.
1012        @param timeout: Maximum job lifetime, in hours.
1013        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1014                             Default: None
1015        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1016                            attribute and skip applying of dependency labels.
1017                            (Default:False)
1018        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1019                           to add to each test being scheduled.
1020        @param priority: Integer priority level.  Higher is more important.
1021        @param wait_for_results: Set to False to run the suite job without
1022                                 waiting for test jobs to finish. Default is
1023                                 True.
1024        @param job_retry: A bool value indicating whether jobs should be retired
1025                          on failure. If True, the field 'JOB_RETRIES' in
1026                          control files will be respected. If False, do not
1027                          retry.
1028        @param max_retries: Maximum retry limit at suite level.
1029                            Regardless how many times each individual test
1030                            has been retried, the total number of retries
1031                            happening in the suite can't exceed _max_retries.
1032                            Default to sys.maxint.
1033        @param offload_failures_only: Only enable gs_offloading for failed
1034                                      jobs.
1035        @param test_source_build: Build that contains the server-side test code.
1036        @param job_keyvals: General job keyvals to be inserted into keyval file,
1037                            which will be used by tko/parse later.
1038        @param child_dependencies: (optional) list of dependency strings
1039                to be added as dependencies to child jobs.
1040        @param result_reporter: A _ResultReporter instance to report results. If
1041                None, an _EmailReporter will be created.
1042        """
1043
1044        self.tests = list(tests)
1045        self._tag = tag
1046        self._builds = builds
1047        self._results_dir = results_dir
1048        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
1049                                                         delay_sec=10,
1050                                                         debug=False)
1051        self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
1052                                                         delay_sec=10,
1053                                                         debug=False)
1054        self._jobs = []
1055        self._jobs_to_tests = {}
1056
1057        self._file_bugs = file_bugs
1058        self._suite_job_id = suite_job_id
1059        self._job_retry=job_retry
1060        self._max_retries = max_retries
1061        # RetryHandler to be initialized in schedule()
1062        self._retry_handler = None
1063        self.wait_for_results = wait_for_results
1064        self._job_keyvals = job_keyvals
1065        if result_reporter is None:
1066            self._result_reporter = _EmailReporter(self)
1067        else:
1068            self._result_reporter = result_reporter
1069
1070        if extra_deps is None:
1071            extra_deps = []
1072        extra_deps.append(board)
1073        if pool:
1074            extra_deps.append(pool)
1075        extra_deps.extend(child_dependencies)
1076        self._dependencies = tuple(extra_deps)
1077
1078        self._job_creator = _SuiteChildJobCreator(
1079            tag=tag,
1080            builds=builds,
1081            board=board,
1082            afe=afe,
1083            max_runtime_mins=max_runtime_mins,
1084            timeout_mins=timeout_mins,
1085            suite_job_id=suite_job_id,
1086            ignore_deps=ignore_deps,
1087            extra_deps=extra_deps,
1088            priority=priority,
1089            offload_failures_only=offload_failures_only,
1090            test_source_build=test_source_build,
1091            job_keyvals=job_keyvals,
1092        )
1093
1094
1095    def _schedule_test(self, record, test, retry_for=None):
1096        """Schedule a single test and return the job.
1097
1098        Schedule a single test by creating a job, and then update relevant
1099        data structures that are used to keep track of all running jobs.
1100
1101        Emits a TEST_NA status log entry if it failed to schedule the test due
1102        to NoEligibleHostException or a non-existent board label.
1103
1104        Returns a frontend.Job object if the test is successfully scheduled.
1105        If scheduling failed due to NoEligibleHostException or a non-existent
1106        board label, returns None.
1107
1108        @param record: A callable to use for logging.
1109                       prototype: record(base_job.status_log_entry)
1110        @param test: ControlData for a test to run.
1111        @param retry_for: If we are scheduling a test to retry an
1112                          old job, the afe_job_id of the old job
1113                          will be passed in as |retry_for|.
1114
1115        @returns: A frontend.Job object or None
1116        """
1117        msg = 'Scheduling %s' % test.name
1118        if retry_for:
1119            msg = msg + ', to retry afe job %d' % retry_for
1120        logging.debug(msg)
1121        begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
1122        try:
1123            job = self._job_creator.create_job(test, retry_for=retry_for)
1124        except (error.NoEligibleHostException, proxy.ValidationError) as e:
1125            if (isinstance(e, error.NoEligibleHostException)
1126                or (isinstance(e, proxy.ValidationError)
1127                    and _is_nonexistent_board_error(e))):
1128                # Treat a dependency on a non-existent board label the same as
1129                # a dependency on a board that exists, but for which there's no
1130                # hardware.
1131                logging.debug('%s not applicable for this board/pool. '
1132                              'Emitting TEST_NA.', test.name)
1133                Status('TEST_NA', test.name,
1134                       'Skipping:  test not supported on this board/pool.',
1135                       begin_time_str=begin_time_str).record_all(record)
1136                return None
1137            else:
1138                raise e
1139        except (error.RPCException, proxy.JSONRPCException):
1140            if retry_for:
1141                # Mark that we've attempted to retry the old job.
1142                self._retry_handler.set_attempted(job_id=retry_for)
1143            raise
1144        else:
1145            self._jobs.append(job)
1146            self._jobs_to_tests[job.id] = test
1147            if retry_for:
1148                # A retry job was just created, record it.
1149                self._retry_handler.add_retry(
1150                        old_job_id=retry_for, new_job_id=job.id)
1151                retry_count = (test.job_retries -
1152                               self._retry_handler.get_retry_max(job.id))
1153                logging.debug('Job %d created to retry job %d. '
1154                              'Have retried for %d time(s)',
1155                              job.id, retry_for, retry_count)
1156            self._remember_job_keyval(job)
1157            return job
1158
1159
1160    def schedule(self, record):
1161        """
1162        Schedule jobs using |self._afe|.
1163
1164        frontend.Job objects representing each scheduled job will be put in
1165        |self._jobs|.
1166
1167        @param record: A callable to use for logging.
1168                       prototype: record(base_job.status_log_entry)
1169        @returns: The number of tests that were scheduled.
1170        """
1171        scheduled_test_names = []
1172        logging.debug('Discovered %d tests.', len(self.tests))
1173
1174        Status('INFO', 'Start %s' % self._tag).record_result(record)
1175        try:
1176            # Write job_keyvals into keyval file.
1177            if self._job_keyvals:
1178                utils.write_keyval(self._results_dir, self._job_keyvals)
1179
1180            # TODO(crbug.com/730885): This is a hack to protect tests that are
1181            # not usually retried from getting hit by a provision error when run
1182            # as part of a suite. Remove this hack once provision is separated
1183            # out in its own suite.
1184            self._bump_up_test_retries(self.tests)
1185            for test in self.tests:
1186                scheduled_job = self._schedule_test(record, test)
1187                if scheduled_job is not None:
1188                    scheduled_test_names.append(test.name)
1189
1190            # Write the num of scheduled tests and name of them to keyval file.
1191            logging.debug('Scheduled %d tests, writing the total to keyval.',
1192                          len(scheduled_test_names))
1193            utils.write_keyval(
1194                self._results_dir,
1195                self._make_scheduled_tests_keyvals(scheduled_test_names))
1196        except Exception:
1197            logging.exception('Exception while scheduling suite')
1198            Status('FAIL', self._tag,
1199                   'Exception while scheduling suite').record_result(record)
1200
1201        if self._job_retry:
1202            self._retry_handler = RetryHandler(
1203                    initial_jobs_to_tests=self._jobs_to_tests,
1204                    max_retries=self._max_retries)
1205        return len(scheduled_test_names)
1206
1207
1208    def _bump_up_test_retries(self, tests):
1209        """Bump up individual test retries to match suite retry options."""
1210        if not self._job_retry:
1211            return
1212
1213        for test in tests:
1214            # We do honor if a test insists on JOB_RETRIES = 0.
1215            if test.job_retries is None:
1216                logging.debug(
1217                        'Test %s did not request retries, but suite requires '
1218                        'retries. Bumping retries up to 1. '
1219                        '(See crbug.com/730885)',
1220                        test.name)
1221                test.job_retries = 1
1222
1223
1224    def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1225        """Make a keyvals dict to write for scheduled test names.
1226
1227        @param scheduled_test_names: A list of scheduled test name strings.
1228
1229        @returns: A keyvals dict.
1230        """
1231        return {
1232            constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1233            constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1234        }
1235
1236
1237    def _should_report(self, result):
1238        """
1239        Returns True if this failure requires to be reported.
1240
1241        @param result: A result, encapsulating the status of the failed job.
1242        @return: True if we should report this failure.
1243        """
1244        return (self._file_bugs and result.test_executed and
1245                not result.is_testna() and
1246                result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
1247
1248
1249    def _has_retry(self, result):
1250        """
1251        Return True if this result gets to retry.
1252
1253        @param result: A result, encapsulating the status of the failed job.
1254        @return: bool
1255        """
1256        return (self._job_retry
1257                and self._retry_handler.has_following_retry(result))
1258
1259
1260    def wait(self, record):
1261        """
1262        Polls for the job statuses, using |record| to print status when each
1263        completes.
1264
1265        @param record: callable that records job status.
1266                 prototype:
1267                   record(base_job.status_log_entry)
1268        """
1269        waiter = job_status.JobResultWaiter(self._afe, self._tko)
1270        try:
1271            if self._suite_job_id:
1272                jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
1273            else:
1274                logging.warning('Unknown suite_job_id, falling back to less '
1275                                'efficient results_generator.')
1276                jobs = self._jobs
1277            waiter.add_jobs(jobs)
1278            for result in waiter.wait_for_results():
1279                self._handle_result(result=result, record=record, waiter=waiter)
1280                if self._finished_waiting():
1281                    break
1282        except Exception:  # pylint: disable=W0703
1283            logging.exception('Exception waiting for results')
1284            Status('FAIL', self._tag,
1285                   'Exception waiting for results').record_result(record)
1286
1287
1288    def _finished_waiting(self):
1289        """Return whether the suite is finished waiting for child jobs."""
1290        return False
1291
1292
1293    def _handle_result(self, result, record, waiter):
1294        """
1295        Handle a test job result.
1296
1297        @param result: Status instance for job.
1298        @param record: callable that records job status.
1299                 prototype:
1300                   record(base_job.status_log_entry)
1301        @param waiter: JobResultsWaiter instance.
1302        @param reporter: _ResultReporter instance.
1303        """
1304        self._record_result(result, record)
1305        rescheduled = False
1306        if self._job_retry and self._retry_handler._should_retry(result):
1307            rescheduled = self._retry_result(result, record, waiter)
1308        # TODO (crbug.com/751428): If the suite times out before a retry could
1309        # finish, we would lose the chance to report errors from the original
1310        # job.
1311        if self._has_retry(result) and rescheduled:
1312             return
1313
1314        if self._should_report(result):
1315            self._result_reporter.report(result)
1316
1317
1318    def _record_result(self, result, record):
1319        """
1320        Record a test job result.
1321
1322        @param result: Status instance for job.
1323        @param record: callable that records job status.
1324                 prototype:
1325                   record(base_job.status_log_entry)
1326        """
1327        result.record_all(record)
1328        self._remember_job_keyval(result)
1329
1330
1331    def _retry_result(self, result, record, waiter):
1332        """
1333        Retry a test job result.
1334
1335        @param result: Status instance for job.
1336        @param record: callable that records job status.
1337                 prototype:
1338                   record(base_job.status_log_entry)
1339        @param waiter: JobResultsWaiter instance.
1340        @returns: True if a job was scheduled for retry, False otherwise.
1341        """
1342        test = self._jobs_to_tests[result.id]
1343        try:
1344            # It only takes effect for CQ retriable job:
1345            #   1) in first try, test.fast=True.
1346            #   2) in second try, test will be run in normal mode, so reset
1347            #       test.fast=False.
1348            test.fast = False
1349            new_job = self._schedule_test(
1350                    record=record, test=test, retry_for=result.id)
1351        except (error.RPCException, proxy.JSONRPCException) as e:
1352            logging.error('Failed to schedule test: %s, Reason: %s',
1353                          test.name, e)
1354            return False
1355        else:
1356            waiter.add_job(new_job)
1357            return bool(new_job)
1358
1359
1360    @property
1361    def _should_file_bugs(self):
1362        """Return whether bugs should be filed.
1363
1364        @returns: bool
1365        """
1366        # File bug when failure is one of the _FILE_BUG_SUITES,
1367        # otherwise send an email to the owner anc cc.
1368        return self._tag in _FILE_BUG_SUITES
1369
1370
1371    def abort(self):
1372        """
1373        Abort all scheduled test jobs.
1374        """
1375        if self._jobs:
1376            job_ids = [job.id for job in self._jobs]
1377            self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
1378
1379
1380    def _remember_job_keyval(self, job):
1381        """
1382        Record provided job as a suite job keyval, for later referencing.
1383
1384        @param job: some representation of a job that has the attributes:
1385                    id, test_name, and owner
1386        """
1387        if self._results_dir and job.id and job.owner and job.test_name:
1388            job_id_owner = '%s-%s' % (job.id, job.owner)
1389            logging.debug('Adding job keyval for %s=%s',
1390                          job.test_name, job_id_owner)
1391            utils.write_keyval(
1392                self._results_dir,
1393                {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1394
1395
1396class Suite(_BaseSuite):
1397    """
1398    A suite of tests, defined by some predicate over control file variables.
1399
1400    Given a place to search for control files a predicate to match the desired
1401    tests, can gather tests and fire off jobs to run them, and then wait for
1402    results.
1403
1404    @var _predicate: a function that should return True when run over a
1405         ControlData representation of a control file that should be in
1406         this Suite.
1407    @var _tag: a string with which to tag jobs run in this suite.
1408    @var _builds: the builds on which we're running this suite.
1409    @var _afe: an instance of AFE as defined in server/frontend.py.
1410    @var _tko: an instance of TKO as defined in server/frontend.py.
1411    @var _jobs: currently scheduled jobs, if any.
1412    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1413                         ControlData objects.
1414    @var _cf_getter: a control_file_getter.ControlFileGetter
1415    @var _retry: a bool value indicating whether jobs should be retried on
1416                 failure.
1417    @var _retry_handler: a RetryHandler object.
1418
1419    """
1420
1421    # TODO(ayatane): These methods are kept on the Suite class for
1422    # backward compatibility.
1423    find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1424    find_possible_tests = _deprecated_suite_method(find_possible_tests)
1425    create_fs_getter = _deprecated_suite_method(create_fs_getter)
1426    name_in_tag_predicate = _deprecated_suite_method(name_in_tag_predicate)
1427    name_in_tag_similarity_predicate = _deprecated_suite_method(
1428            name_in_tag_similarity_predicate)
1429    test_name_equals_predicate = _deprecated_suite_method(
1430            test_name_equals_predicate)
1431    test_name_matches_pattern_predicate = _deprecated_suite_method(
1432            test_name_matches_pattern_predicate)
1433    test_file_matches_pattern_predicate = _deprecated_suite_method(
1434            test_file_matches_pattern_predicate)
1435    matches_attribute_expression_predicate = _deprecated_suite_method(
1436            matches_attribute_expression_predicate)
1437    test_name_similarity_predicate = _deprecated_suite_method(
1438            test_name_similarity_predicate)
1439    test_file_similarity_predicate = _deprecated_suite_method(
1440            test_file_similarity_predicate)
1441    list_all_suites = _deprecated_suite_method(list_all_suites)
1442    get_test_source_build = _deprecated_suite_method(get_test_source_build)
1443
1444
1445    @classmethod
1446    def create_from_predicates(cls, predicates, builds, board, devserver,
1447                               cf_getter=None, name='ad_hoc_suite',
1448                               run_prod_code=False, **dargs):
1449        """
1450        Create a Suite using a given predicate test filters.
1451
1452        Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1453        |autotest_dir| and will schedule them using |afe|.  Pulls control files
1454        from the default dev server. Results will be pulled from |tko| upon
1455        completion.
1456
1457        @param predicates: A list of callables that accept ControlData
1458                           representations of control files. A test will be
1459                           included in suite if all callables in this list
1460                           return True on the given control file.
1461        @param builds: the builds on which we're running this suite. It's a
1462                       dictionary of version_prefix:build.
1463        @param board: the board on which we're running this suite.
1464        @param devserver: the devserver which contains the build.
1465        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1466                          using DevServerGetter.
1467        @param name: name of suite. Defaults to 'ad_hoc_suite'
1468        @param run_prod_code: If true, the suite will run the tests that
1469                              lives in prod aka the test code currently on the
1470                              lab servers.
1471        @param **dargs: Any other Suite constructor parameters, as described
1472                        in Suite.__init__ docstring.
1473        @return a Suite instance.
1474        """
1475        if cf_getter is None:
1476            if run_prod_code:
1477                cf_getter = create_fs_getter(_AUTOTEST_DIR)
1478            else:
1479                build = get_test_source_build(builds, **dargs)
1480                cf_getter = _create_ds_getter(build, devserver)
1481
1482        return cls(predicates,
1483                   name, builds, board, cf_getter, run_prod_code, **dargs)
1484
1485
1486    @classmethod
1487    def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1488                         **dargs):
1489        """
1490        Create a Suite using a predicate based on the SUITE control file var.
1491
1492        Makes a predicate based on |name| and uses it to instantiate a Suite
1493        that looks for tests in |autotest_dir| and will schedule them using
1494        |afe|.  Pulls control files from the default dev server.
1495        Results will be pulled from |tko| upon completion.
1496
1497        @param name: a value of the SUITE control file variable to search for.
1498        @param builds: the builds on which we're running this suite. It's a
1499                       dictionary of version_prefix:build.
1500        @param board: the board on which we're running this suite.
1501        @param devserver: the devserver which contains the build.
1502        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1503                          using DevServerGetter.
1504        @param **dargs: Any other Suite constructor parameters, as described
1505                        in Suite.__init__ docstring.
1506        @return a Suite instance.
1507        """
1508        if cf_getter is None:
1509            build = get_test_source_build(builds, **dargs)
1510            cf_getter = _create_ds_getter(build, devserver)
1511
1512        return cls([name_in_tag_predicate(name)],
1513                   name, builds, board, cf_getter, **dargs)
1514
1515
1516    def __init__(
1517            self,
1518            predicates,
1519            tag,
1520            builds,
1521            board,
1522            cf_getter,
1523            run_prod_code=False,
1524            afe=None,
1525            tko=None,
1526            pool=None,
1527            results_dir=None,
1528            max_runtime_mins=24*60,
1529            timeout_mins=24*60,
1530            file_bugs=False,
1531            suite_job_id=None,
1532            ignore_deps=False,
1533            extra_deps=None,
1534            priority=priorities.Priority.DEFAULT,
1535            forgiving_parser=True,
1536            wait_for_results=True,
1537            job_retry=False,
1538            max_retries=sys.maxint,
1539            offload_failures_only=False,
1540            test_source_build=None,
1541            job_keyvals=None,
1542            test_args=None,
1543            child_dependencies=(),
1544            result_reporter=None,
1545    ):
1546        """
1547        Constructor
1548
1549        @param predicates: A list of callables that accept ControlData
1550                           representations of control files. A test will be
1551                           included in suite if all callables in this list
1552                           return True on the given control file.
1553        @param tag: a string with which to tag jobs run in this suite.
1554        @param builds: the builds on which we're running this suite.
1555        @param board: the board on which we're running this suite.
1556        @param cf_getter: a control_file_getter.ControlFileGetter
1557        @param afe: an instance of AFE as defined in server/frontend.py.
1558        @param tko: an instance of TKO as defined in server/frontend.py.
1559        @param pool: Specify the pool of machines to use for scheduling
1560                purposes.
1561        @param run_prod_code: If true, the suite will run the test code that
1562                              lives in prod aka the test code currently on the
1563                              lab servers.
1564        @param results_dir: The directory where the job can write results to.
1565                            This must be set if you want job_id of sub-jobs
1566                            list in the job keyvals.
1567        @param max_runtime_mins: Maximum suite runtime, in minutes.
1568        @param timeout: Maximum job lifetime, in hours.
1569        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1570                             Default: None
1571        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1572                            attribute and skip applying of dependency labels.
1573                            (Default:False)
1574        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1575                           to add to each test being scheduled.
1576        @param priority: Integer priority level.  Higher is more important.
1577        @param wait_for_results: Set to False to run the suite job without
1578                                 waiting for test jobs to finish. Default is
1579                                 True.
1580        @param job_retry: A bool value indicating whether jobs should be retired
1581                          on failure. If True, the field 'JOB_RETRIES' in
1582                          control files will be respected. If False, do not
1583                          retry.
1584        @param max_retries: Maximum retry limit at suite level.
1585                            Regardless how many times each individual test
1586                            has been retried, the total number of retries
1587                            happening in the suite can't exceed _max_retries.
1588                            Default to sys.maxint.
1589        @param offload_failures_only: Only enable gs_offloading for failed
1590                                      jobs.
1591        @param test_source_build: Build that contains the server-side test code.
1592        @param job_keyvals: General job keyvals to be inserted into keyval file,
1593                            which will be used by tko/parse later.
1594        @param test_args: A dict of args passed all the way to each individual
1595                          test that will be actually ran.
1596        @param child_dependencies: (optional) list of dependency strings
1597                to be added as dependencies to child jobs.
1598        @param result_reporter: A _ResultReporter instance to report results. If
1599                None, an _EmailReporter will be created.
1600        """
1601        tests = find_and_parse_tests(
1602                cf_getter,
1603                _ComposedPredicate(predicates),
1604                tag,
1605                forgiving_parser=forgiving_parser,
1606                run_prod_code=run_prod_code,
1607                test_args=test_args,
1608        )
1609        super(Suite, self).__init__(
1610                tests=tests,
1611                tag=tag,
1612                builds=builds,
1613                board=board,
1614                afe=afe,
1615                tko=tko,
1616                pool=pool,
1617                results_dir=results_dir,
1618                max_runtime_mins=max_runtime_mins,
1619                timeout_mins=timeout_mins,
1620                file_bugs=file_bugs,
1621                suite_job_id=suite_job_id,
1622                ignore_deps=ignore_deps,
1623                extra_deps=extra_deps,
1624                priority=priority,
1625                wait_for_results=wait_for_results,
1626                job_retry=job_retry,
1627                max_retries=max_retries,
1628                offload_failures_only=offload_failures_only,
1629                test_source_build=test_source_build,
1630                job_keyvals=job_keyvals,
1631                child_dependencies=child_dependencies,
1632                result_reporter=result_reporter,
1633        )
1634
1635
1636class ProvisionSuite(_BaseSuite):
1637    """
1638    A suite for provisioning DUTs.
1639
1640    This is done by creating dummy_Pass tests.
1641    """
1642
1643
1644    def __init__(
1645            self,
1646            tag,
1647            builds,
1648            board,
1649            devserver,
1650            num_required,
1651            num_max=float('inf'),
1652            cf_getter=None,
1653            run_prod_code=False,
1654            test_args=None,
1655            test_source_build=None,
1656            **kwargs):
1657        """
1658        Constructor
1659
1660        @param tag: a string with which to tag jobs run in this suite.
1661        @param builds: the builds on which we're running this suite.
1662        @param board: the board on which we're running this suite.
1663        @param devserver: the devserver which contains the build.
1664        @param num_required: number of tests that must pass.  This is
1665                             capped by the number of tests that are run.
1666        @param num_max: max number of tests to make.  By default there
1667                        is no cap, a test is created for each eligible host.
1668        @param cf_getter: a control_file_getter.ControlFileGetter.
1669        @param test_args: A dict of args passed all the way to each individual
1670                          test that will be actually ran.
1671        @param test_source_build: Build that contains the server-side test code.
1672        @param kwargs: Various keyword arguments passed to
1673                       _BaseSuite constructor.
1674        """
1675        super(ProvisionSuite, self).__init__(
1676                tests=[],
1677                tag=tag,
1678                builds=builds,
1679                board=board,
1680                **kwargs)
1681        self._num_successful = 0
1682        self._num_required = 0
1683        self.tests = []
1684
1685        static_deps = [dep for dep in self._dependencies
1686                       if not provision.Provision.acts_on(dep)]
1687        if 'pool:suites' in static_deps:
1688            logging.info('Provision suite is disabled on suites pool')
1689            return
1690        logging.debug('Looking for hosts matching %r', static_deps)
1691        hosts = self._afe.get_hosts(
1692                invalid=False, multiple_labels=static_deps)
1693        logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
1694        available_hosts = [h for h in hosts if h.is_available()]
1695        logging.debug('Found %d available hosts for ProvisionSuite',
1696                      len(available_hosts))
1697        dummy_test = _load_dummy_test(
1698                builds, devserver, cf_getter,
1699                run_prod_code, test_args, test_source_build)
1700        self.tests = [dummy_test] * min(len(available_hosts), num_max)
1701        logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1702        self._num_required = min(num_required, len(self.tests))
1703        logging.debug('Expecting %d tests to pass for ProvisionSuite',
1704                      self._num_required)
1705
1706    def _handle_result(self, result, record, waiter):
1707        super(ProvisionSuite, self)._handle_result(result, record, waiter)
1708        if result.is_good():
1709            self._num_successful += 1
1710
1711    def _finished_waiting(self):
1712        return self._num_successful >= self._num_required
1713
1714
1715def _load_dummy_test(
1716        builds,
1717        devserver,
1718        cf_getter=None,
1719        run_prod_code=False,
1720        test_args=None,
1721        test_source_build=None):
1722    """
1723    Load and return the dummy pass test.
1724
1725    @param builds: the builds on which we're running this suite.
1726    @param devserver: the devserver which contains the build.
1727    @param cf_getter: a control_file_getter.ControlFileGetter.
1728    @param test_args: A dict of args passed all the way to each individual
1729                      test that will be actually ran.
1730    @param test_source_build: Build that contains the server-side test code.
1731    """
1732    if cf_getter is None:
1733        if run_prod_code:
1734            cf_getter = create_fs_getter(_AUTOTEST_DIR)
1735        else:
1736            build = get_test_source_build(
1737                    builds, test_source_build=test_source_build)
1738            cf_getter = _create_ds_getter(build, devserver)
1739    retriever = _get_cf_retriever(cf_getter,
1740                                  run_prod_code=run_prod_code,
1741                                  test_args=test_args)
1742    return retriever.retrieve('dummy_Pass')
1743
1744
1745class _ComposedPredicate(object):
1746    """Return the composition of the predicates.
1747
1748    Predicates are functions that take a test control data object and
1749    return True of that test is to be included.  The returned
1750    predicate's set is the intersection of all of the input predicates'
1751    sets (it returns True if all predicates return True).
1752    """
1753
1754    def __init__(self, predicates):
1755        """Initialize instance.
1756
1757        @param predicates: Iterable of predicates.
1758        """
1759        self._predicates = list(predicates)
1760
1761    def __repr__(self):
1762        return '{cls}({this._predicates!r})'.format(
1763            cls=type(self).__name__,
1764            this=self,
1765        )
1766
1767    def __call__(self, control_data_):
1768        return all(f(control_data_) for f in self._predicates)
1769
1770
1771def _is_nonexistent_board_error(e):
1772    """Return True if error is caused by nonexistent board label.
1773
1774    As of this writing, the particular case we want looks like this:
1775
1776     1) e.problem_keys is a dictionary
1777     2) e.problem_keys['meta_hosts'] exists as the only key
1778        in the dictionary.
1779     3) e.problem_keys['meta_hosts'] matches this pattern:
1780        "Label "board:.*" not found"
1781
1782    We check for conditions 1) and 2) on the
1783    theory that they're relatively immutable.
1784    We don't check condition 3) because it seems
1785    likely to be a maintenance burden, and for the
1786    times when we're wrong, being right shouldn't
1787    matter enough (we _hope_).
1788
1789    @param e: proxy.ValidationError instance
1790    @returns: boolean
1791    """
1792    return (isinstance(e.problem_keys, dict)
1793            and len(e.problem_keys) == 1
1794            and 'meta_hosts' in e.problem_keys)
1795
1796
1797class _ResultReporter(object):
1798    """Abstract base class for reporting test results.
1799
1800    Usually, this is used to report test failures.
1801    """
1802
1803    __metaclass__ = abc.ABCMeta
1804
1805    @abc.abstractmethod
1806    def report(self, result):
1807        """Report test result.
1808
1809        @param result: Status instance for job.
1810        """
1811
1812
1813class _EmailReporter(_ResultReporter):
1814    """Class that emails based on test failures."""
1815
1816    # TODO(akeshet): Document what |bug_template| is actually supposed to come
1817    # from, and rename it to something unrelated to "bugs" which are no longer
1818    # relevant now that this is purely an email sender.
1819    def __init__(self, suite, bug_template=None):
1820        self._suite = suite
1821        self._bug_template = bug_template or {}
1822
1823    def _get_test_bug(self, result):
1824        """Get TestBug for the given result.
1825
1826        @param result: Status instance for a test job.
1827        @returns: TestBug instance.
1828        """
1829        # reporting modules have dependency on external packages, e.g., httplib2
1830        # Such dependency can cause issue to any module tries to import suite.py
1831        # without building site-packages first. Since the reporting modules are
1832        # only used in this function, move the imports here avoid the
1833        # requirement of building site packages to use other functions in this
1834        # module.
1835        from autotest_lib.server.cros.dynamic_suite import reporting
1836
1837        job_views = self._suite._tko.run('get_detailed_test_views',
1838                                         afe_job_id=result.id)
1839        return reporting.TestBug(self._suite._job_creator.cros_build,
1840                utils.get_chrome_version(job_views),
1841                self._suite._tag,
1842                result)
1843
1844    def _get_bug_template(self, result):
1845        """Get BugTemplate for test job.
1846
1847        @param result: Status instance for job.
1848        @param bug_template: A template dictionary specifying the default bug
1849                             filing options for failures in this suite.
1850        @returns: BugTemplate instance
1851        """
1852        # reporting modules have dependency on external packages, e.g., httplib2
1853        # Such dependency can cause issue to any module tries to import suite.py
1854        # without building site-packages first. Since the reporting modules are
1855        # only used in this function, move the imports here avoid the
1856        # requirement of building site packages to use other functions in this
1857        # module.
1858        from autotest_lib.server.cros.dynamic_suite import reporting_utils
1859
1860        # Try to merge with bug template in test control file.
1861        template = reporting_utils.BugTemplate(self._bug_template)
1862        try:
1863            test_data = self._suite._jobs_to_tests[result.id]
1864            return template.finalize_bug_template(
1865                    test_data.bug_template)
1866        except AttributeError:
1867            # Test control file does not have bug template defined.
1868            return template.bug_template
1869        except reporting_utils.InvalidBugTemplateException as e:
1870            logging.error('Merging bug templates failed with '
1871                          'error: %s An empty bug template will '
1872                          'be used.', e)
1873            return {}
1874
1875    def report(self, result):
1876        # reporting modules have dependency on external
1877        # packages, e.g., httplib2 Such dependency can cause
1878        # issue to any module tries to import suite.py without
1879        # building site-packages first. Since the reporting
1880        # modules are only used in this function, move the
1881        # imports here avoid the requirement of building site
1882        # packages to use other functions in this module.
1883        from autotest_lib.server.cros.dynamic_suite import reporting
1884
1885        reporting.send_email(
1886                self._get_test_bug(result),
1887                self._get_bug_template(result))
1888