1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import abc
6import datetime
7import difflib
8import functools
9import hashlib
10import logging
11import operator
12import os
13import re
14import sys
15import warnings
16
17import common
18
19from autotest_lib.frontend.afe.json_rpc import proxy
20from autotest_lib.client.common_lib import enum
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import priorities
24from autotest_lib.client.common_lib import time_utils
25from autotest_lib.client.common_lib import utils
26from autotest_lib.frontend.afe import model_attributes
27from autotest_lib.frontend.afe.json_rpc import proxy
28from autotest_lib.server.cros import provision
29from autotest_lib.server.cros.dynamic_suite import constants
30from autotest_lib.server.cros.dynamic_suite import control_file_getter
31from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
32from autotest_lib.server.cros.dynamic_suite import job_status
33from autotest_lib.server.cros.dynamic_suite import suite_common
34from autotest_lib.server.cros.dynamic_suite import tools
35from autotest_lib.server.cros.dynamic_suite.job_status import Status
36
37try:
38    from chromite.lib import boolparse_lib
39    from chromite.lib import cros_logging as logging
40except ImportError:
41    print 'Unable to import chromite.'
42    print 'This script must be either:'
43    print '  - Be run in the chroot.'
44    print '  - (not yet supported) be run after running '
45    print '    ../utils/build_externals.py'
46
47_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
48                    'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
49                    'sanity', 'push_to_prod']
50_AUTOTEST_DIR = global_config.global_config.get_config_value(
51        'SCHEDULER', 'drone_installation_directory')
52
53
54class RetryHandler(object):
55    """Maintain retry information.
56
57    @var _retry_map: A dictionary that stores retry history.
58            The key is afe job id. The value is a dictionary.
59            {job_id: {'state':RetryHandler.States, 'retry_max':int}}
60            - state:
61                The retry state of a job.
62                NOT_ATTEMPTED:
63                    We haven't done anything about the job.
64                ATTEMPTED:
65                    We've made an attempt to schedule a retry job. The
66                    scheduling may or may not be successful, e.g.
67                    it might encounter an rpc error. Note failure
68                    in scheduling a retry is different from a retry job failure.
69                    For each job, we only attempt to schedule a retry once.
70                    For example, assume we have a test with JOB_RETRIES=5 and
71                    its second retry job failed. When we attempt to create
72                    a third retry job to retry the second, we hit an rpc
73                    error. In such case, we will give up on all following
74                    retries.
75                RETRIED:
76                    A retry job has already been successfully
77                    scheduled.
78            - retry_max:
79                The maximum of times the job can still
80                be retried, taking into account retries
81                that have occurred.
82    @var _retry_level: A retry might be triggered only if the result
83            is worse than the level.
84    @var _max_retries: Maximum retry limit at suite level.
85                     Regardless how many times each individual test
86                     has been retried, the total number of retries happening in
87                     the suite can't exceed _max_retries.
88    """
89
90    States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
91                       start_value=1, step=1)
92
93    def __init__(self, initial_jobs_to_tests, retry_level='WARN',
94                 max_retries=None):
95        """Initialize RetryHandler.
96
97        @param initial_jobs_to_tests: A dictionary that maps a job id to
98                a ControlData object. This dictionary should contain
99                jobs that are originally scheduled by the suite.
100        @param retry_level: A retry might be triggered only if the result is
101                worse than the level.
102        @param max_retries: Integer, maxmium total retries allowed
103                                  for the suite. Default to None, no max.
104        """
105        self._retry_map = {}
106        self._retry_level = retry_level
107        self._max_retries = (max_retries
108                             if max_retries is not None else sys.maxint)
109        for job_id, test in initial_jobs_to_tests.items():
110            if test.job_retries > 0:
111                self._add_job(new_job_id=job_id,
112                              retry_max=test.job_retries)
113            else:
114                logging.debug("Test %s has no retries", test.name)
115
116
117    def _add_job(self, new_job_id, retry_max):
118        """Add a newly-created job to the retry map.
119
120        @param new_job_id: The afe_job_id of a newly created job.
121        @param retry_max: The maximum of times that we could retry
122                          the test if the job fails.
123
124        @raises ValueError if new_job_id is already in retry map.
125
126        """
127        if new_job_id in self._retry_map:
128            raise ValueError('add_job called when job is already in retry map.')
129
130        self._retry_map[new_job_id] = {
131                'state': self.States.NOT_ATTEMPTED,
132                'retry_max': retry_max}
133
134
135    def _suite_max_reached(self):
136        """Return whether maximum retry limit for a suite has been reached."""
137        return self._max_retries <= 0
138
139
140    def add_retry(self, old_job_id, new_job_id):
141        """Record a retry.
142
143        Update retry map with the retry information.
144
145        @param old_job_id: The afe_job_id of the job that is retried.
146        @param new_job_id: The afe_job_id of the retry job.
147
148        @raises KeyError if old_job_id isn't in the retry map.
149        @raises ValueError if we have already retried or made an attempt
150                to retry the old job.
151
152        """
153        old_record = self._retry_map[old_job_id]
154        if old_record['state'] != self.States.NOT_ATTEMPTED:
155            raise ValueError(
156                    'We have already retried or attempted to retry job %d' %
157                    old_job_id)
158        old_record['state'] = self.States.RETRIED
159        self._add_job(new_job_id=new_job_id,
160                      retry_max=old_record['retry_max'] - 1)
161        self._max_retries -= 1
162
163
164    def set_attempted(self, job_id):
165        """Set the state of the job to ATTEMPTED.
166
167        @param job_id: afe_job_id of a job.
168
169        @raises KeyError if job_id isn't in the retry map.
170        @raises ValueError if the current state is not NOT_ATTEMPTED.
171
172        """
173        current_state = self._retry_map[job_id]['state']
174        if current_state != self.States.NOT_ATTEMPTED:
175            # We are supposed to retry or attempt to retry each job
176            # only once. Raise an error if this is not the case.
177            raise ValueError('Unexpected state transition: %s -> %s' %
178                             (self.States.get_string(current_state),
179                              self.States.get_string(self.States.ATTEMPTED)))
180        else:
181            self._retry_map[job_id]['state'] = self.States.ATTEMPTED
182
183
184    def has_following_retry(self, result):
185        """Check whether there will be a following retry.
186
187        We have the following cases for a given job id (result.id),
188        - no retry map entry -> retry not required, no following retry
189        - has retry map entry:
190            - already retried -> has following retry
191            - has not retried
192                (this branch can be handled by checking should_retry(result))
193                - retry_max == 0 --> the last retry job, no more retry
194                - retry_max > 0
195                   - attempted, but has failed in scheduling a
196                     following retry due to rpc error  --> no more retry
197                   - has not attempped --> has following retry if test failed.
198
199        @param result: A result, encapsulating the status of the job.
200
201        @returns: True, if there will be a following retry.
202                  False otherwise.
203
204        """
205        return (result.test_executed
206                and result.id in self._retry_map
207                and (self._retry_map[result.id]['state'] == self.States.RETRIED
208                     or self._should_retry(result)))
209
210
211    def _should_retry(self, result):
212        """Check whether we should retry a job based on its result.
213
214        We will retry the job that corresponds to the result
215        when all of the following are true.
216        a) The test was actually executed, meaning that if
217           a job was aborted before it could ever reach the state
218           of 'Running', the job will not be retried.
219        b) The result is worse than |self._retry_level| which
220           defaults to 'WARN'.
221        c) The test requires retry, i.e. the job has an entry in the retry map.
222        d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
223           Note that if a test has JOB_RETRIES=5, and the second time
224           it was retried it hit an rpc error, we will give up on
225           all following retries.
226        e) The job has not reached its retry max, i.e. retry_max > 0
227
228        @param result: A result, encapsulating the status of the job.
229
230        @returns: True if we should retry the job.
231
232        """
233        return (
234            result.test_executed
235            and result.id in self._retry_map
236            and not self._suite_max_reached()
237            and result.is_worse_than(
238                job_status.Status(self._retry_level, '', 'reason'))
239            and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
240            and self._retry_map[result.id]['retry_max'] > 0
241        )
242
243    def _should_retry_local_job(self, job_id):
244        """Check whether we should retry a job based on information available
245        for a local job without a Result object.
246
247        We will retry the job that corresponds to the result
248        when all of the following are true.
249        a) The test requires retry, i.e. the job has an entry in the retry map.
250        b) We haven't made any retry attempt yet for this job, i.e.
251           state == NOT_ATTEMPTED
252           If the job is aborted,  we will give up on all following retries,
253           regardless of max_retries.
254        c) The job has not reached its retry max, i.e. retry_max > 0
255
256        @param job_id: the id for the job, to look up relevant information.
257
258        @returns: True if we should retry the job.
259
260        """
261        if self._suite_max_reached():
262            logging.debug('suite max_retries reached, not retrying.')
263            return False
264        if job_id not in self._retry_map:
265            logging.debug('job_id not in retry map, not retrying.')
266            return False
267        if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
268            logging.debug("job state was %s not 'Not Attempted', not retrying",
269                          self._retry_map[job_id]['state'])
270            return False
271        if self._retry_map[job_id]['retry_max'] <= 0:
272            logging.debug('test-level retries exhausted, not retrying')
273            return False
274        return True
275
276
277    def job_present(self, job_id):
278        """Check whether a job id present in the retry map.
279
280        @param job_id: afe_job_id of a job.
281
282        @returns: A True if the job is present, False if not.
283        """
284        return bool(self._retry_map.get(job_id))
285
286
287
288    def get_retry_max(self, job_id):
289        """Get the maximum times the job can still be retried.
290
291        @param job_id: afe_job_id of a job.
292
293        @returns: An int, representing the maximum times the job can still be
294                  retried.
295        @raises KeyError if job_id isn't in the retry map.
296
297        """
298        return self._retry_map[job_id]['retry_max']
299
300
301class _SuiteChildJobCreator(object):
302    """Create test jobs for a suite."""
303
304    def __init__(
305            self,
306            tag,
307            builds,
308            board,
309            afe=None,
310            max_runtime_mins=24*60,
311            timeout_mins=24*60,
312            suite_job_id=None,
313            ignore_deps=False,
314            extra_deps=(),
315            priority=priorities.Priority.DEFAULT,
316            offload_failures_only=False,
317            test_source_build=None,
318            job_keyvals=None,
319    ):
320        """
321        Constructor
322
323        @param tag: a string with which to tag jobs run in this suite.
324        @param builds: the builds on which we're running this suite.
325        @param board: the board on which we're running this suite.
326        @param afe: an instance of AFE as defined in server/frontend.py.
327        @param max_runtime_mins: Maximum suite runtime, in minutes.
328        @param timeout_mins: Maximum job lifetime, in minutes.
329        @param suite_job_id: Job id that will act as parent id to all sub jobs.
330                             Default: None
331        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
332                            attribute and skip applying of dependency labels.
333                            (Default:False)
334        @param extra_deps: A list of strings which are the extra DEPENDENCIES
335                           to add to each test being scheduled.
336        @param priority: Integer priority level.  Higher is more important.
337        @param offload_failures_only: Only enable gs_offloading for failed
338                                      jobs.
339        @param test_source_build: Build that contains the server-side test code.
340        @param job_keyvals: General job keyvals to be inserted into keyval file,
341                            which will be used by tko/parse later.
342        """
343        self._tag = tag
344        self._builds = builds
345        self._board = board
346        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
347                                                         delay_sec=10,
348                                                         debug=False)
349        self._max_runtime_mins = max_runtime_mins
350        self._timeout_mins = timeout_mins
351        self._suite_job_id = suite_job_id
352        self._ignore_deps = ignore_deps
353        self._extra_deps = tuple(extra_deps)
354        self._priority = priority
355        self._offload_failures_only = offload_failures_only
356        self._test_source_build = test_source_build
357        self._job_keyvals = job_keyvals
358
359
360    @property
361    def cros_build(self):
362        """Return the CrOS build or the first build in the builds dict."""
363        # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
364        # sure what the implications of this are, but it's probably not a
365        # good thing.
366        return self._builds.get(provision.CROS_VERSION_PREFIX,
367                                self._builds.values()[0])
368
369
370    def create_job(self, test, retry_for=None):
371        """
372        Thin wrapper around frontend.AFE.create_job().
373
374        @param test: ControlData object for a test to run.
375        @param retry_for: If the to-be-created job is a retry for an
376                          old job, the afe_job_id of the old job will
377                          be passed in as |retry_for|, which will be
378                          recorded in the new job's keyvals.
379        @returns: A frontend.Job object with an added test_name member.
380                  test_name is used to preserve the higher level TEST_NAME
381                  name of the job.
382        """
383        # For a system running multiple suites which share tests, the priority
384        # overridden may lead to unexpected scheduling order that adds extra
385        # provision jobs.
386        test_priority = self._priority
387        if utils.is_moblab():
388            test_priority = max(self._priority, test.priority)
389
390        reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
391                         else None)
392
393        test_obj = self._afe.create_job(
394            control_file=test.text,
395            name=tools.create_job_name(
396                    self._test_source_build or self.cros_build,
397                    self._tag,
398                    test.name),
399            control_type=test.test_type.capitalize(),
400            meta_hosts=[self._board]*test.sync_count,
401            dependencies=self._create_job_deps(test),
402            keyvals=self._create_keyvals_for_test_job(test, retry_for),
403            max_runtime_mins=self._max_runtime_mins,
404            timeout_mins=self._timeout_mins,
405            parent_job_id=self._suite_job_id,
406            reboot_before=reboot_before,
407            run_reset=not test.fast,
408            priority=test_priority,
409            synch_count=test.sync_count,
410            require_ssp=test.require_ssp)
411
412        test_obj.test_name = test.name
413        return test_obj
414
415
416    def _create_job_deps(self, test):
417        """Create job deps list for a test job.
418
419        @returns: A list of dependency strings.
420        """
421        if self._ignore_deps:
422            job_deps = []
423        else:
424            job_deps = list(test.dependencies)
425        job_deps.extend(self._extra_deps)
426        return job_deps
427
428
429    def _create_keyvals_for_test_job(self, test, retry_for=None):
430        """Create keyvals dict for creating a test job.
431
432        @param test: ControlData object for a test to run.
433        @param retry_for: If the to-be-created job is a retry for an
434                          old job, the afe_job_id of the old job will
435                          be passed in as |retry_for|, which will be
436                          recorded in the new job's keyvals.
437        @returns: A keyvals dict for creating the test job.
438        """
439        keyvals = {
440            constants.JOB_BUILD_KEY: self.cros_build,
441            constants.JOB_SUITE_KEY: self._tag,
442            constants.JOB_EXPERIMENTAL_KEY: test.experimental,
443            constants.JOB_BUILDS_KEY: self._builds
444        }
445        # test_source_build is saved to job_keyvals so scheduler can retrieve
446        # the build name from database when compiling autoserv commandline.
447        # This avoid a database change to add a new field in afe_jobs.
448        #
449        # Only add `test_source_build` to job keyvals if the build is different
450        # from the CrOS build or the job uses more than one build, e.g., both
451        # firmware and CrOS will be updated in the dut.
452        # This is for backwards compatibility, so the update Autotest code can
453        # compile an autoserv command line to run in a SSP container using
454        # previous builds.
455        if (self._test_source_build and
456            (self.cros_build != self._test_source_build or
457             len(self._builds) > 1)):
458            keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
459                    self._test_source_build
460            for prefix, build in self._builds.iteritems():
461                if prefix == provision.FW_RW_VERSION_PREFIX:
462                    keyvals[constants.FWRW_BUILD]= build
463                elif prefix == provision.FW_RO_VERSION_PREFIX:
464                    keyvals[constants.FWRO_BUILD] = build
465        # Add suite job id to keyvals so tko parser can read it from keyval
466        # file.
467        if self._suite_job_id:
468            keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
469        # We drop the old job's id in the new job's keyval file so that
470        # later our tko parser can figure out the retry relationship and
471        # invalidate the results of the old job in tko database.
472        if retry_for:
473            keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
474        if self._offload_failures_only:
475            keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
476        if self._job_keyvals:
477            for key in constants.INHERITED_KEYVALS:
478                if key in self._job_keyvals:
479                    keyvals[key] = self._job_keyvals[key]
480        return keyvals
481
482
483class _ControlFileRetriever(object):
484    """Retrieves control files.
485
486    This returns control data instances, unlike control file getters
487    which simply return the control file text contents.
488    """
489
490    def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
491                 test_args=None):
492        """Initialize instance.
493
494        @param cf_getter: a control_file_getter.ControlFileGetter used to list
495               and fetch the content of control files
496        @param forgiving_parser: If False, will raise ControlVariableExceptions
497                                 if any are encountered when parsing control
498                                 files. Note that this can raise an exception
499                                 for syntax errors in unrelated files, because
500                                 we parse them before applying the predicate.
501        @param run_prod_code: If true, the retrieved tests will run the test
502                              code that lives in prod aka the test code
503                              currently on the lab servers by disabling
504                              SSP for the discovered tests.
505        @param test_args: A dict of args to be seeded in test control file under
506                          the name |args_dict|.
507        """
508        self._cf_getter = cf_getter
509        self._forgiving_parser = forgiving_parser
510        self._run_prod_code = run_prod_code
511        self._test_args = test_args
512
513
514    def retrieve_for_test(self, test_name):
515        """Retrieve a test's control data.
516
517        This ignores forgiving_parser because we cannot return a
518        forgiving value.
519
520        @param test_name: Name of test to retrieve.
521
522        @raises ControlVariableException: There is a syntax error in a
523                                          control file.
524
525        @returns a ControlData object
526        """
527        return suite_common.retrieve_control_data_for_test(
528                self._cf_getter, test_name)
529
530
531    def retrieve_for_suite(self, suite_name=''):
532        """Scan through all tests and find all tests.
533
534        @param suite_name: If specified, this method will attempt to restrain
535                           the search space to just this suite's control files.
536
537        @raises ControlVariableException: If forgiving_parser is False and there
538                                          is a syntax error in a control file.
539
540        @returns a dictionary of ControlData objects that based on given
541                 parameters.
542        """
543        tests = suite_common.retrieve_for_suite(
544                self._cf_getter, suite_name, self._forgiving_parser,
545                self._test_args)
546        if self._run_prod_code:
547            for test in tests.itervalues():
548                test.require_ssp = False
549
550        return tests
551
552
553def list_all_suites(build, devserver, cf_getter=None):
554    """
555    Parses all ControlData objects with a SUITE tag and extracts all
556    defined suite names.
557
558    @param build: the build on which we're running this suite.
559    @param devserver: the devserver which contains the build.
560    @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
561                      using DevServerGetter.
562
563    @return list of suites
564    """
565    if cf_getter is None:
566        cf_getter = _create_ds_getter(build, devserver)
567
568    suites = set()
569    predicate = lambda t: True
570    for test in find_and_parse_tests(cf_getter, predicate):
571        suites.update(test.suite_tag_parts)
572    return list(suites)
573
574
575def test_file_similarity_predicate(test_file_pattern):
576    """Returns predicate that gets the similarity based on a test's file
577    name pattern.
578
579    Builds a predicate that takes in a parsed control file (a ControlData)
580    and returns a tuple of (file path, ratio), where ratio is the
581    similarity between the test file name and the given test_file_pattern.
582
583    @param test_file_pattern: regular expression (string) to match against
584                              control file names.
585    @return a callable that takes a ControlData and and returns a tuple of
586            (file path, ratio), where ratio is the similarity between the
587            test file name and the given test_file_pattern.
588    """
589    return lambda t: ((None, 0) if not hasattr(t, 'path') else
590            (t.path, difflib.SequenceMatcher(a=t.path,
591                                             b=test_file_pattern).ratio()))
592
593
594def test_name_similarity_predicate(test_name):
595    """Returns predicate that matched based on a test's name.
596
597    Builds a predicate that takes in a parsed control file (a ControlData)
598    and returns a tuple of (test name, ratio), where ratio is the similarity
599    between the test name and the given test_name.
600
601    @param test_name: the test name to base the predicate on.
602    @return a callable that takes a ControlData and returns a tuple of
603            (test name, ratio), where ratio is the similarity between the
604            test name and the given test_name.
605    """
606    return lambda t: ((None, 0) if not hasattr(t, 'name') else
607            (t.name,
608             difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
609
610
611def matches_attribute_expression_predicate(test_attr_boolstr):
612    """Returns predicate that matches based on boolean expression of
613    attributes.
614
615    Builds a predicate that takes in a parsed control file (a ControlData)
616    ans returns True if the test attributes satisfy the given attribute
617    boolean expression.
618
619    @param test_attr_boolstr: boolean expression of the attributes to be
620                              test, like 'system:all and interval:daily'.
621
622    @return a callable that takes a ControlData and returns True if the test
623            attributes satisfy the given boolean expression.
624    """
625    return lambda t: boolparse_lib.BoolstrResult(
626        test_attr_boolstr, t.attributes)
627
628
629def test_file_matches_pattern_predicate(test_file_pattern):
630    """Returns predicate that matches based on a test's file name pattern.
631
632    Builds a predicate that takes in a parsed control file (a ControlData)
633    and returns True if the test's control file name matches the given
634    regular expression.
635
636    @param test_file_pattern: regular expression (string) to match against
637                              control file names.
638    @return a callable that takes a ControlData and and returns
639            True if control file name matches the pattern.
640    """
641    return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
642                                                     t.path)
643
644
645def test_name_matches_pattern_predicate(test_name_pattern):
646    """Returns predicate that matches based on a test's name pattern.
647
648    Builds a predicate that takes in a parsed control file (a ControlData)
649    and returns True if the test name matches the given regular expression.
650
651    @param test_name_pattern: regular expression (string) to match against
652                              test names.
653    @return a callable that takes a ControlData and returns
654            True if the name fields matches the pattern.
655    """
656    return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
657                                                     t.name)
658
659
660def test_name_equals_predicate(test_name):
661    """Returns predicate that matched based on a test's name.
662
663    Builds a predicate that takes in a parsed control file (a ControlData)
664    and returns True if the test name is equal to |test_name|.
665
666    @param test_name: the test name to base the predicate on.
667    @return a callable that takes a ControlData and looks for |test_name|
668            in that ControlData's name.
669    """
670    return lambda t: hasattr(t, 'name') and test_name == t.name
671
672
673def name_in_tag_similarity_predicate(name):
674    """Returns predicate that takes a control file and gets the similarity
675    of the suites in the control file and the given name.
676
677    Builds a predicate that takes in a parsed control file (a ControlData)
678    and returns a list of tuples of (suite name, ratio), where suite name
679    is each suite listed in the control file, and ratio is the similarity
680    between each suite and the given name.
681
682    @param name: the suite name to base the predicate on.
683    @return a callable that takes a ControlData and returns a list of tuples
684            of (suite name, ratio), where suite name is each suite listed in
685            the control file, and ratio is the similarity between each suite
686            and the given name.
687    """
688    return lambda t: [(suite,
689                       difflib.SequenceMatcher(a=suite, b=name).ratio())
690                      for suite in t.suite_tag_parts] or [(None, 0)]
691
692
693def name_in_tag_predicate(name):
694    """Returns predicate that takes a control file and looks for |name|.
695
696    Builds a predicate that takes in a parsed control file (a ControlData)
697    and returns True if the SUITE tag is present and contains |name|.
698
699    @param name: the suite name to base the predicate on.
700    @return a callable that takes a ControlData and looks for |name| in that
701            ControlData object's suite member.
702    """
703    return suite_common.name_in_tag_predicate(name)
704
705
706def create_fs_getter(autotest_dir):
707    """
708    @param autotest_dir: the place to find autotests.
709    @return a FileSystemGetter instance that looks under |autotest_dir|.
710    """
711    # currently hard-coded places to look for tests.
712    subpaths = ['server/site_tests', 'client/site_tests',
713                'server/tests', 'client/tests']
714    directories = [os.path.join(autotest_dir, p) for p in subpaths]
715    return control_file_getter.FileSystemGetter(directories)
716
717
718def _create_ds_getter(build, devserver):
719    """
720    @param build: the build on which we're running this suite.
721    @param devserver: the devserver which contains the build.
722    @return a FileSystemGetter instance that looks under |autotest_dir|.
723    """
724    return control_file_getter.DevServerGetter(build, devserver)
725
726
727def _non_experimental_tests_predicate(test_data):
728    """Test predicate for non-experimental tests."""
729    return not test_data.experimental
730
731
732def find_and_parse_tests(cf_getter, predicate, suite_name='',
733                         add_experimental=False, forgiving_parser=True,
734                         run_prod_code=False, test_args=None):
735    """
736    Function to scan through all tests and find eligible tests.
737
738    Search through all tests based on given cf_getter, suite_name,
739    add_experimental and forgiving_parser, return the tests that match
740    given predicate.
741
742    @param cf_getter: a control_file_getter.ControlFileGetter used to list
743           and fetch the content of control files
744    @param predicate: a function that should return True when run over a
745           ControlData representation of a control file that should be in
746           this Suite.
747    @param suite_name: If specified, this method will attempt to restrain
748                       the search space to just this suite's control files.
749    @param add_experimental: add tests with experimental attribute set.
750    @param forgiving_parser: If False, will raise ControlVariableExceptions
751                             if any are encountered when parsing control
752                             files. Note that this can raise an exception
753                             for syntax errors in unrelated files, because
754                             we parse them before applying the predicate.
755    @param run_prod_code: If true, the suite will run the test code that
756                          lives in prod aka the test code currently on the
757                          lab servers by disabling SSP for the discovered
758                          tests.
759    @param test_args: A dict of args to be seeded in test control file.
760
761    @raises ControlVariableException: If forgiving_parser is False and there
762                                      is a syntax error in a control file.
763
764    @return list of ControlData objects that should be run, with control
765            file text added in |text| attribute. Results are sorted based
766            on the TIME setting in control file, slowest test comes first.
767    """
768    logging.debug('Getting control file list for suite: %s', suite_name)
769    retriever = _ControlFileRetriever(cf_getter,
770                                      forgiving_parser=forgiving_parser,
771                                      run_prod_code=run_prod_code,
772                                      test_args=test_args)
773    tests = retriever.retrieve_for_suite(suite_name)
774    if not add_experimental:
775        predicate = _ComposedPredicate([predicate,
776                                        _non_experimental_tests_predicate])
777    return suite_common.filter_tests(tests, predicate)
778
779
780def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
781    """
782    Function to scan through all tests and find possible tests.
783
784    Search through all tests based on given cf_getter, suite_name,
785    add_experimental and forgiving_parser. Use the given predicate to
786    calculate the similarity and return the top 10 matches.
787
788    @param cf_getter: a control_file_getter.ControlFileGetter used to list
789           and fetch the content of control files
790    @param predicate: a function that should return a tuple of (name, ratio)
791           when run over a ControlData representation of a control file that
792           should be in this Suite. `name` is the key to be compared, e.g.,
793           a suite name or test name. `ratio` is a value between [0,1]
794           indicating the similarity of `name` and the value to be compared.
795    @param suite_name: If specified, this method will attempt to restrain
796                       the search space to just this suite's control files.
797    @param count: Number of suggestions to return, default to 10.
798
799    @return list of top names that similar to the given test, sorted by
800            match ratio.
801    """
802    logging.debug('Getting control file list for suite: %s', suite_name)
803    tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
804    logging.debug('Parsed %s control files.', len(tests))
805    similarities = {}
806    for test in tests.itervalues():
807        ratios = predicate(test)
808        # Some predicates may return a list of tuples, e.g.,
809        # name_in_tag_similarity_predicate. Convert all returns to a list.
810        if not isinstance(ratios, list):
811            ratios = [ratios]
812        for name, ratio in ratios:
813            similarities[name] = ratio
814    return [s[0] for s in
815            sorted(similarities.items(), key=operator.itemgetter(1),
816                   reverse=True)][:count]
817
818
819def _deprecated_suite_method(func):
820    """Decorator for deprecated Suite static methods.
821
822    TODO(ayatane): This is used to decorate functions that are called as
823    static methods on Suite.
824    """
825    @functools.wraps(func)
826    def wrapper(*args, **kwargs):
827        """Wraps |func| for warning."""
828        warnings.warn('Calling method "%s" from Suite is deprecated' %
829                      func.__name__)
830        return func(*args, **kwargs)
831    return staticmethod(wrapper)
832
833
834class _BaseSuite(object):
835    """
836    A suite of tests, defined by some predicate over control file variables.
837
838    Given a place to search for control files a predicate to match the desired
839    tests, can gather tests and fire off jobs to run them, and then wait for
840    results.
841
842    @var _predicate: a function that should return True when run over a
843         ControlData representation of a control file that should be in
844         this Suite.
845    @var _tag: a string with which to tag jobs run in this suite.
846    @var _builds: the builds on which we're running this suite.
847    @var _afe: an instance of AFE as defined in server/frontend.py.
848    @var _tko: an instance of TKO as defined in server/frontend.py.
849    @var _jobs: currently scheduled jobs, if any.
850    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
851                         ControlData objects.
852    @var _retry: a bool value indicating whether jobs should be retried on
853                 failure.
854    @var _retry_handler: a RetryHandler object.
855
856    """
857
858
859    def __init__(
860            self,
861            tests,
862            tag,
863            builds,
864            board,
865            afe=None,
866            tko=None,
867            pool=None,
868            results_dir=None,
869            max_runtime_mins=24*60,
870            timeout_mins=24*60,
871            file_bugs=False,
872            suite_job_id=None,
873            ignore_deps=False,
874            extra_deps=None,
875            priority=priorities.Priority.DEFAULT,
876            wait_for_results=True,
877            job_retry=False,
878            max_retries=sys.maxint,
879            offload_failures_only=False,
880            test_source_build=None,
881            job_keyvals=None,
882            child_dependencies=(),
883            result_reporter=None,
884    ):
885        """Initialize instance.
886
887        @param tests: Iterable of tests to run.
888        @param tag: a string with which to tag jobs run in this suite.
889        @param builds: the builds on which we're running this suite.
890        @param board: the board on which we're running this suite.
891        @param afe: an instance of AFE as defined in server/frontend.py.
892        @param tko: an instance of TKO as defined in server/frontend.py.
893        @param pool: Specify the pool of machines to use for scheduling
894                purposes.
895        @param results_dir: The directory where the job can write results to.
896                            This must be set if you want job_id of sub-jobs
897                            list in the job keyvals.
898        @param max_runtime_mins: Maximum suite runtime, in minutes.
899        @param timeout: Maximum job lifetime, in hours.
900        @param suite_job_id: Job id that will act as parent id to all sub jobs.
901                             Default: None
902        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
903                            attribute and skip applying of dependency labels.
904                            (Default:False)
905        @param extra_deps: A list of strings which are the extra DEPENDENCIES
906                           to add to each test being scheduled.
907        @param priority: Integer priority level.  Higher is more important.
908        @param wait_for_results: Set to False to run the suite job without
909                                 waiting for test jobs to finish. Default is
910                                 True.
911        @param job_retry: A bool value indicating whether jobs should be retried
912                          on failure. If True, the field 'JOB_RETRIES' in
913                          control files will be respected. If False, do not
914                          retry.
915        @param max_retries: Maximum retry limit at suite level.
916                            Regardless how many times each individual test
917                            has been retried, the total number of retries
918                            happening in the suite can't exceed _max_retries.
919                            Default to sys.maxint.
920        @param offload_failures_only: Only enable gs_offloading for failed
921                                      jobs.
922        @param test_source_build: Build that contains the server-side test code.
923        @param job_keyvals: General job keyvals to be inserted into keyval file,
924                            which will be used by tko/parse later.
925        @param child_dependencies: (optional) list of dependency strings
926                to be added as dependencies to child jobs.
927        @param result_reporter: A _ResultReporter instance to report results. If
928                None, an _EmailReporter will be created.
929        """
930
931        self.tests = list(tests)
932        self._tag = tag
933        self._builds = builds
934        self._results_dir = results_dir
935        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
936                                                         delay_sec=10,
937                                                         debug=False)
938        self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
939                                                         delay_sec=10,
940                                                         debug=False)
941        self._jobs = []
942        self._jobs_to_tests = {}
943
944        self._file_bugs = file_bugs
945        self._suite_job_id = suite_job_id
946        self._job_retry=job_retry
947        self._max_retries = max_retries
948        # RetryHandler to be initialized in schedule()
949        self._retry_handler = None
950        self.wait_for_results = wait_for_results
951        self._job_keyvals = job_keyvals
952        if result_reporter is None:
953            self._result_reporter = _EmailReporter(self)
954        else:
955            self._result_reporter = result_reporter
956
957        if extra_deps is None:
958            extra_deps = []
959        extra_deps.append(board)
960        if pool:
961            extra_deps.append(pool)
962        extra_deps.extend(child_dependencies)
963        self._dependencies = tuple(extra_deps)
964
965        self._job_creator = _SuiteChildJobCreator(
966            tag=tag,
967            builds=builds,
968            board=board,
969            afe=afe,
970            max_runtime_mins=max_runtime_mins,
971            timeout_mins=timeout_mins,
972            suite_job_id=suite_job_id,
973            ignore_deps=ignore_deps,
974            extra_deps=extra_deps,
975            priority=priority,
976            offload_failures_only=offload_failures_only,
977            test_source_build=test_source_build,
978            job_keyvals=job_keyvals,
979        )
980
981
982    def _schedule_test(self, record, test, retry_for=None):
983        """Schedule a single test and return the job.
984
985        Schedule a single test by creating a job, and then update relevant
986        data structures that are used to keep track of all running jobs.
987
988        Emits a TEST_NA status log entry if it failed to schedule the test due
989        to NoEligibleHostException or a non-existent board label.
990
991        Returns a frontend.Job object if the test is successfully scheduled.
992        If scheduling failed due to NoEligibleHostException or a non-existent
993        board label, returns None.
994
995        @param record: A callable to use for logging.
996                       prototype: record(base_job.status_log_entry)
997        @param test: ControlData for a test to run.
998        @param retry_for: If we are scheduling a test to retry an
999                          old job, the afe_job_id of the old job
1000                          will be passed in as |retry_for|.
1001
1002        @returns: A frontend.Job object or None
1003        """
1004        msg = 'Scheduling %s' % test.name
1005        if retry_for:
1006            msg = msg + ', to retry afe job %d' % retry_for
1007        logging.debug(msg)
1008        begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
1009        try:
1010            job = self._job_creator.create_job(test, retry_for=retry_for)
1011        except (error.NoEligibleHostException, proxy.ValidationError) as e:
1012            if (isinstance(e, error.NoEligibleHostException)
1013                or (isinstance(e, proxy.ValidationError)
1014                    and _is_nonexistent_board_error(e))):
1015                # Treat a dependency on a non-existent board label the same as
1016                # a dependency on a board that exists, but for which there's no
1017                # hardware.
1018                logging.debug('%s not applicable for this board/pool. '
1019                              'Emitting TEST_NA.', test.name)
1020                Status('TEST_NA', test.name,
1021                       'Skipping:  test not supported on this board/pool.',
1022                       begin_time_str=begin_time_str).record_all(record)
1023                return None
1024            else:
1025                raise e
1026        except (error.RPCException, proxy.JSONRPCException):
1027            if retry_for:
1028                # Mark that we've attempted to retry the old job.
1029                logging.debug("RPC exception occurred")
1030                self._retry_handler.set_attempted(job_id=retry_for)
1031            raise
1032        else:
1033            self._jobs.append(job)
1034            self._jobs_to_tests[job.id] = test
1035            if retry_for:
1036                # A retry job was just created, record it.
1037                self._retry_handler.add_retry(
1038                        old_job_id=retry_for, new_job_id=job.id)
1039                retry_count = (test.job_retries -
1040                               self._retry_handler.get_retry_max(job.id))
1041                logging.debug('Job %d created to retry job %d. '
1042                              'Have retried for %d time(s)',
1043                              job.id, retry_for, retry_count)
1044            self._remember_job_keyval(job)
1045            return job
1046
1047    def schedule(self, record):
1048        """
1049        Schedule jobs using |self._afe|.
1050
1051        frontend.Job objects representing each scheduled job will be put in
1052        |self._jobs|.
1053
1054        @param record: A callable to use for logging.
1055                       prototype: record(base_job.status_log_entry)
1056        @returns: The number of tests that were scheduled.
1057        """
1058        scheduled_test_names = []
1059        logging.debug('Discovered %d tests.', len(self.tests))
1060
1061        Status('INFO', 'Start %s' % self._tag).record_result(record)
1062        try:
1063            # Write job_keyvals into keyval file.
1064            if self._job_keyvals:
1065                utils.write_keyval(self._results_dir, self._job_keyvals)
1066
1067            # TODO(crbug.com/730885): This is a hack to protect tests that are
1068            # not usually retried from getting hit by a provision error when run
1069            # as part of a suite. Remove this hack once provision is separated
1070            # out in its own suite.
1071            self._bump_up_test_retries(self.tests)
1072            for test in self.tests:
1073                scheduled_job = self._schedule_test(record, test)
1074                if scheduled_job is not None:
1075                    scheduled_test_names.append(test.name)
1076
1077            # Write the num of scheduled tests and name of them to keyval file.
1078            logging.debug('Scheduled %d tests, writing the total to keyval.',
1079                          len(scheduled_test_names))
1080            utils.write_keyval(
1081                self._results_dir,
1082                self._make_scheduled_tests_keyvals(scheduled_test_names))
1083        except Exception:
1084            logging.exception('Exception while scheduling suite')
1085            Status('FAIL', self._tag,
1086                   'Exception while scheduling suite').record_result(record)
1087
1088        if self._job_retry:
1089            logging.debug("Initializing RetryHandler for suite %s.", self._tag)
1090            self._retry_handler = RetryHandler(
1091                    initial_jobs_to_tests=self._jobs_to_tests,
1092                    max_retries=self._max_retries)
1093            logging.debug("retry map created: %s ",
1094                          self._retry_handler._retry_map)
1095        else:
1096            logging.info("Will not retry jobs from suite %s.", self._tag)
1097        return len(scheduled_test_names)
1098
1099
1100    def _bump_up_test_retries(self, tests):
1101        """Bump up individual test retries to match suite retry options."""
1102        if not self._job_retry:
1103            return
1104
1105        for test in tests:
1106            # We do honor if a test insists on JOB_RETRIES = 0.
1107            if test.job_retries is None:
1108                logging.debug(
1109                        'Test %s did not request retries, but suite requires '
1110                        'retries. Bumping retries up to 1. '
1111                        '(See crbug.com/730885)',
1112                        test.name)
1113                test.job_retries = 1
1114
1115
1116    def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1117        """Make a keyvals dict to write for scheduled test names.
1118
1119        @param scheduled_test_names: A list of scheduled test name strings.
1120
1121        @returns: A keyvals dict.
1122        """
1123        return {
1124            constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1125            constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1126        }
1127
1128
1129    def _should_report(self, result):
1130        """
1131        Returns True if this failure requires to be reported.
1132
1133        @param result: A result, encapsulating the status of the failed job.
1134        @return: True if we should report this failure.
1135        """
1136        return (self._file_bugs and result.test_executed and
1137                not result.is_testna() and
1138                result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
1139
1140
1141    def _has_retry(self, result):
1142        """
1143        Return True if this result gets to retry.
1144
1145        @param result: A result, encapsulating the status of the failed job.
1146        @return: bool
1147        """
1148        return (self._job_retry
1149                and self._retry_handler.has_following_retry(result))
1150
1151
1152    def wait(self, record):
1153        """
1154        Polls for the job statuses, using |record| to print status when each
1155        completes.
1156
1157        @param record: callable that records job status.
1158                 prototype:
1159                   record(base_job.status_log_entry)
1160        """
1161        waiter = job_status.JobResultWaiter(self._afe, self._tko)
1162        try:
1163            if self._suite_job_id:
1164                jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
1165            else:
1166                logging.warning('Unknown suite_job_id, falling back to less '
1167                                'efficient results_generator.')
1168                jobs = self._jobs
1169            waiter.add_jobs(jobs)
1170            for result in waiter.wait_for_results():
1171                self._handle_result(result=result, record=record, waiter=waiter)
1172                if self._finished_waiting():
1173                    break
1174        except Exception:  # pylint: disable=W0703
1175            logging.exception('Exception waiting for results')
1176            Status('FAIL', self._tag,
1177                   'Exception waiting for results').record_result(record)
1178
1179
1180    def _finished_waiting(self):
1181        """Return whether the suite is finished waiting for child jobs."""
1182        return False
1183
1184
1185    def _handle_result(self, result, record, waiter):
1186        """
1187        Handle a test job result.
1188
1189        @param result: Status instance for job.
1190        @param record: callable that records job status.
1191                 prototype:
1192                   record(base_job.status_log_entry)
1193        @param waiter: JobResultsWaiter instance.
1194
1195        @instance_param _result_reporter: _ResultReporter instance.
1196        """
1197        self._record_result(result, record)
1198        rescheduled = False
1199        if self._job_retry and self._retry_handler._should_retry(result):
1200            rescheduled = self._retry_result(result, record, waiter)
1201        # TODO (crbug.com/751428): If the suite times out before a retry could
1202        # finish, we would lose the chance to report errors from the original
1203        # job.
1204        if self._has_retry(result) and rescheduled:
1205             return
1206
1207        if self._should_report(result):
1208            self._result_reporter.report(result)
1209
1210    def _record_result(self, result, record):
1211        """
1212        Record a test job result.
1213
1214        @param result: Status instance for job.
1215        @param record: callable that records job status.
1216                 prototype:
1217                   record(base_job.status_log_entry)
1218        """
1219        result.record_all(record)
1220        self._remember_job_keyval(result)
1221
1222
1223    def _retry_result(self, result, record, waiter):
1224        """
1225        Retry a test job result.
1226
1227        @param result: Status instance for job.
1228        @param record: callable that records job status.
1229                 prototype:
1230                   record(base_job.status_log_entry)
1231        @param waiter: JobResultsWaiter instance.
1232        @returns: True if a job was scheduled for retry, False otherwise.
1233        """
1234        test = self._jobs_to_tests[result.id]
1235        try:
1236            # It only takes effect for CQ retriable job:
1237            #   1) in first try, test.fast=True.
1238            #   2) in second try, test will be run in normal mode, so reset
1239            #       test.fast=False.
1240            test.fast = False
1241            new_job = self._schedule_test(
1242                    record=record, test=test, retry_for=result.id)
1243        except (error.RPCException, proxy.JSONRPCException) as e:
1244            logging.error('Failed to schedule test: %s, Reason: %s',
1245                          test.name, e)
1246            return False
1247        else:
1248            waiter.add_job(new_job)
1249            return bool(new_job)
1250
1251    @property
1252    def jobs(self):
1253        """Give a copy of the associated jobs
1254
1255        @returns: array of jobs"""
1256        return [job for job in self._jobs]
1257
1258
1259    @property
1260    def _should_file_bugs(self):
1261        """Return whether bugs should be filed.
1262
1263        @returns: bool
1264        """
1265        # File bug when failure is one of the _FILE_BUG_SUITES,
1266        # otherwise send an email to the owner anc cc.
1267        return self._tag in _FILE_BUG_SUITES
1268
1269
1270    def abort(self):
1271        """
1272        Abort all scheduled test jobs.
1273        """
1274        if self._jobs:
1275            job_ids = [job.id for job in self._jobs]
1276            self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
1277
1278
1279    def _remember_job_keyval(self, job):
1280        """
1281        Record provided job as a suite job keyval, for later referencing.
1282
1283        @param job: some representation of a job that has the attributes:
1284                    id, test_name, and owner
1285        """
1286        if self._results_dir and job.id and job.owner and job.test_name:
1287            job_id_owner = '%s-%s' % (job.id, job.owner)
1288            logging.debug('Adding job keyval for %s=%s',
1289                          job.test_name, job_id_owner)
1290            utils.write_keyval(
1291                self._results_dir,
1292                {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1293
1294
1295class Suite(_BaseSuite):
1296    """
1297    A suite of tests, defined by some predicate over control file variables.
1298
1299    Given a place to search for control files a predicate to match the desired
1300    tests, can gather tests and fire off jobs to run them, and then wait for
1301    results.
1302
1303    @var _predicate: a function that should return True when run over a
1304         ControlData representation of a control file that should be in
1305         this Suite.
1306    @var _tag: a string with which to tag jobs run in this suite.
1307    @var _builds: the builds on which we're running this suite.
1308    @var _afe: an instance of AFE as defined in server/frontend.py.
1309    @var _tko: an instance of TKO as defined in server/frontend.py.
1310    @var _jobs: currently scheduled jobs, if any.
1311    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1312                         ControlData objects.
1313    @var _cf_getter: a control_file_getter.ControlFileGetter
1314    @var _retry: a bool value indicating whether jobs should be retried on
1315                 failure.
1316    @var _retry_handler: a RetryHandler object.
1317
1318    """
1319
1320    # TODO(ayatane): These methods are kept on the Suite class for
1321    # backward compatibility.
1322    find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1323    find_possible_tests = _deprecated_suite_method(find_possible_tests)
1324    create_fs_getter = _deprecated_suite_method(create_fs_getter)
1325    name_in_tag_predicate = _deprecated_suite_method(
1326            suite_common.name_in_tag_predicate)
1327    name_in_tag_similarity_predicate = _deprecated_suite_method(
1328            name_in_tag_similarity_predicate)
1329    test_name_equals_predicate = _deprecated_suite_method(
1330            test_name_equals_predicate)
1331    test_name_matches_pattern_predicate = _deprecated_suite_method(
1332            test_name_matches_pattern_predicate)
1333    test_file_matches_pattern_predicate = _deprecated_suite_method(
1334            test_file_matches_pattern_predicate)
1335    matches_attribute_expression_predicate = _deprecated_suite_method(
1336            matches_attribute_expression_predicate)
1337    test_name_similarity_predicate = _deprecated_suite_method(
1338            test_name_similarity_predicate)
1339    test_file_similarity_predicate = _deprecated_suite_method(
1340            test_file_similarity_predicate)
1341    list_all_suites = _deprecated_suite_method(list_all_suites)
1342    get_test_source_build = _deprecated_suite_method(
1343            suite_common.get_test_source_build)
1344
1345
1346    @classmethod
1347    def create_from_predicates(cls, predicates, builds, board, devserver,
1348                               cf_getter=None, name='ad_hoc_suite',
1349                               run_prod_code=False, **dargs):
1350        """
1351        Create a Suite using a given predicate test filters.
1352
1353        Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1354        |autotest_dir| and will schedule them using |afe|.  Pulls control files
1355        from the default dev server. Results will be pulled from |tko| upon
1356        completion.
1357
1358        @param predicates: A list of callables that accept ControlData
1359                           representations of control files. A test will be
1360                           included in suite if all callables in this list
1361                           return True on the given control file.
1362        @param builds: the builds on which we're running this suite. It's a
1363                       dictionary of version_prefix:build.
1364        @param board: the board on which we're running this suite.
1365        @param devserver: the devserver which contains the build.
1366        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1367                          using DevServerGetter.
1368        @param name: name of suite. Defaults to 'ad_hoc_suite'
1369        @param run_prod_code: If true, the suite will run the tests that
1370                              lives in prod aka the test code currently on the
1371                              lab servers.
1372        @param **dargs: Any other Suite constructor parameters, as described
1373                        in Suite.__init__ docstring.
1374        @return a Suite instance.
1375        """
1376        if cf_getter is None:
1377            if run_prod_code:
1378                cf_getter = create_fs_getter(_AUTOTEST_DIR)
1379            else:
1380                build = suite_common.get_test_source_build(builds, **dargs)
1381                cf_getter = _create_ds_getter(build, devserver)
1382
1383        return cls(predicates,
1384                   name, builds, board, cf_getter, run_prod_code, **dargs)
1385
1386
1387    @classmethod
1388    def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1389                         **dargs):
1390        """
1391        Create a Suite using a predicate based on the SUITE control file var.
1392
1393        Makes a predicate based on |name| and uses it to instantiate a Suite
1394        that looks for tests in |autotest_dir| and will schedule them using
1395        |afe|.  Pulls control files from the default dev server.
1396        Results will be pulled from |tko| upon completion.
1397
1398        @param name: a value of the SUITE control file variable to search for.
1399        @param builds: the builds on which we're running this suite. It's a
1400                       dictionary of version_prefix:build.
1401        @param board: the board on which we're running this suite.
1402        @param devserver: the devserver which contains the build.
1403        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1404                          using DevServerGetter.
1405        @param **dargs: Any other Suite constructor parameters, as described
1406                        in Suite.__init__ docstring.
1407        @return a Suite instance.
1408        """
1409        if cf_getter is None:
1410            build = suite_common.get_test_source_build(builds, **dargs)
1411            cf_getter = _create_ds_getter(build, devserver)
1412
1413        return cls([suite_common.name_in_tag_predicate(name)],
1414                   name, builds, board, cf_getter, **dargs)
1415
1416
1417    def __init__(
1418            self,
1419            predicates,
1420            tag,
1421            builds,
1422            board,
1423            cf_getter,
1424            run_prod_code=False,
1425            afe=None,
1426            tko=None,
1427            pool=None,
1428            results_dir=None,
1429            max_runtime_mins=24*60,
1430            timeout_mins=24*60,
1431            file_bugs=False,
1432            suite_job_id=None,
1433            ignore_deps=False,
1434            extra_deps=None,
1435            priority=priorities.Priority.DEFAULT,
1436            forgiving_parser=True,
1437            wait_for_results=True,
1438            job_retry=False,
1439            max_retries=sys.maxint,
1440            offload_failures_only=False,
1441            test_source_build=None,
1442            job_keyvals=None,
1443            test_args=None,
1444            child_dependencies=(),
1445            result_reporter=None,
1446    ):
1447        """
1448        Constructor
1449
1450        @param predicates: A list of callables that accept ControlData
1451                           representations of control files. A test will be
1452                           included in suite if all callables in this list
1453                           return True on the given control file.
1454        @param tag: a string with which to tag jobs run in this suite.
1455        @param builds: the builds on which we're running this suite.
1456        @param board: the board on which we're running this suite.
1457        @param cf_getter: a control_file_getter.ControlFileGetter
1458        @param afe: an instance of AFE as defined in server/frontend.py.
1459        @param tko: an instance of TKO as defined in server/frontend.py.
1460        @param pool: Specify the pool of machines to use for scheduling
1461                purposes.
1462        @param run_prod_code: If true, the suite will run the test code that
1463                              lives in prod aka the test code currently on the
1464                              lab servers.
1465        @param results_dir: The directory where the job can write results to.
1466                            This must be set if you want job_id of sub-jobs
1467                            list in the job keyvals.
1468        @param max_runtime_mins: Maximum suite runtime, in minutes.
1469        @param timeout: Maximum job lifetime, in hours.
1470        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1471                             Default: None
1472        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1473                            attribute and skip applying of dependency labels.
1474                            (Default:False)
1475        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1476                           to add to each test being scheduled.
1477        @param priority: Integer priority level.  Higher is more important.
1478        @param wait_for_results: Set to False to run the suite job without
1479                                 waiting for test jobs to finish. Default is
1480                                 True.
1481        @param job_retry: A bool value indicating whether jobs should be retried
1482                          on failure. If True, the field 'JOB_RETRIES' in
1483                          control files will be respected. If False, do not
1484                          retry.
1485        @param max_retries: Maximum retry limit at suite level.
1486                            Regardless how many times each individual test
1487                            has been retried, the total number of retries
1488                            happening in the suite can't exceed _max_retries.
1489                            Default to sys.maxint.
1490        @param offload_failures_only: Only enable gs_offloading for failed
1491                                      jobs.
1492        @param test_source_build: Build that contains the server-side test code.
1493        @param job_keyvals: General job keyvals to be inserted into keyval file,
1494                            which will be used by tko/parse later.
1495        @param test_args: A dict of args passed all the way to each individual
1496                          test that will be actually ran.
1497        @param child_dependencies: (optional) list of dependency strings
1498                to be added as dependencies to child jobs.
1499        @param result_reporter: A _ResultReporter instance to report results. If
1500                None, an _EmailReporter will be created.
1501        """
1502        tests = find_and_parse_tests(
1503                cf_getter,
1504                _ComposedPredicate(predicates),
1505                tag,
1506                forgiving_parser=forgiving_parser,
1507                run_prod_code=run_prod_code,
1508                test_args=test_args,
1509        )
1510        super(Suite, self).__init__(
1511                tests=tests,
1512                tag=tag,
1513                builds=builds,
1514                board=board,
1515                afe=afe,
1516                tko=tko,
1517                pool=pool,
1518                results_dir=results_dir,
1519                max_runtime_mins=max_runtime_mins,
1520                timeout_mins=timeout_mins,
1521                file_bugs=file_bugs,
1522                suite_job_id=suite_job_id,
1523                ignore_deps=ignore_deps,
1524                extra_deps=extra_deps,
1525                priority=priority,
1526                wait_for_results=wait_for_results,
1527                job_retry=job_retry,
1528                max_retries=max_retries,
1529                offload_failures_only=offload_failures_only,
1530                test_source_build=test_source_build,
1531                job_keyvals=job_keyvals,
1532                child_dependencies=child_dependencies,
1533                result_reporter=result_reporter,
1534        )
1535
1536
1537class ProvisionSuite(_BaseSuite):
1538    """
1539    A suite for provisioning DUTs.
1540
1541    This is done by creating dummy_Pass tests.
1542    """
1543
1544
1545    def __init__(
1546            self,
1547            tag,
1548            builds,
1549            board,
1550            devserver,
1551            num_required,
1552            num_max=float('inf'),
1553            cf_getter=None,
1554            run_prod_code=False,
1555            test_args=None,
1556            test_source_build=None,
1557            **kwargs):
1558        """
1559        Constructor
1560
1561        @param tag: a string with which to tag jobs run in this suite.
1562        @param builds: the builds on which we're running this suite.
1563        @param board: the board on which we're running this suite.
1564        @param devserver: the devserver which contains the build.
1565        @param num_required: number of tests that must pass.  This is
1566                             capped by the number of tests that are run.
1567        @param num_max: max number of tests to make.  By default there
1568                        is no cap, a test is created for each eligible host.
1569        @param cf_getter: a control_file_getter.ControlFileGetter.
1570        @param test_args: A dict of args passed all the way to each individual
1571                          test that will be actually ran.
1572        @param test_source_build: Build that contains the server-side test code.
1573        @param kwargs: Various keyword arguments passed to
1574                       _BaseSuite constructor.
1575        """
1576        super(ProvisionSuite, self).__init__(
1577                tests=[],
1578                tag=tag,
1579                builds=builds,
1580                board=board,
1581                **kwargs)
1582        self._num_successful = 0
1583        self._num_required = 0
1584        self.tests = []
1585
1586        static_deps = [dep for dep in self._dependencies
1587                       if not provision.Provision.acts_on(dep)]
1588        if 'pool:suites' in static_deps:
1589            logging.info('Provision suite is disabled on suites pool')
1590            return
1591        logging.debug('Looking for hosts matching %r', static_deps)
1592        hosts = self._afe.get_hosts(
1593                invalid=False, multiple_labels=static_deps)
1594        logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
1595        available_hosts = [h for h in hosts if h.is_available()]
1596        logging.debug('Found %d available hosts for ProvisionSuite',
1597                      len(available_hosts))
1598        dummy_test = _load_dummy_test(
1599                builds, devserver, cf_getter,
1600                run_prod_code, test_args, test_source_build)
1601        self.tests = [dummy_test] * min(len(available_hosts), num_max)
1602        logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1603        self._num_required = min(num_required, len(self.tests))
1604        logging.debug('Expecting %d tests to pass for ProvisionSuite',
1605                      self._num_required)
1606
1607    def _handle_result(self, result, record, waiter):
1608        super(ProvisionSuite, self)._handle_result(result, record, waiter)
1609        if result.is_good():
1610            self._num_successful += 1
1611
1612    def _finished_waiting(self):
1613        return self._num_successful >= self._num_required
1614
1615
1616def _load_dummy_test(
1617        builds,
1618        devserver,
1619        cf_getter=None,
1620        run_prod_code=False,
1621        test_args=None,
1622        test_source_build=None):
1623    """
1624    Load and return the dummy pass test.
1625
1626    @param builds: the builds on which we're running this suite.
1627    @param devserver: the devserver which contains the build.
1628    @param cf_getter: a control_file_getter.ControlFileGetter.
1629    @param test_args: A dict of args passed all the way to each individual
1630                      test that will be actually ran.
1631    @param test_source_build: Build that contains the server-side test code.
1632    """
1633    if cf_getter is None:
1634        if run_prod_code:
1635            cf_getter = create_fs_getter(_AUTOTEST_DIR)
1636        else:
1637            build = suite_common.get_test_source_build(
1638                    builds, test_source_build=test_source_build)
1639            devserver.stage_artifacts(image=build,
1640                                      artifacts=['control_files'])
1641            cf_getter = _create_ds_getter(build, devserver)
1642    retriever = _ControlFileRetriever(cf_getter,
1643                                      run_prod_code=run_prod_code,
1644                                      test_args=test_args)
1645    return retriever.retrieve_for_test('dummy_Pass')
1646
1647
1648class _ComposedPredicate(object):
1649    """Return the composition of the predicates.
1650
1651    Predicates are functions that take a test control data object and
1652    return True of that test is to be included.  The returned
1653    predicate's set is the intersection of all of the input predicates'
1654    sets (it returns True if all predicates return True).
1655    """
1656
1657    def __init__(self, predicates):
1658        """Initialize instance.
1659
1660        @param predicates: Iterable of predicates.
1661        """
1662        self._predicates = list(predicates)
1663
1664    def __repr__(self):
1665        return '{cls}({this._predicates!r})'.format(
1666            cls=type(self).__name__,
1667            this=self,
1668        )
1669
1670    def __call__(self, control_data_):
1671        return all(f(control_data_) for f in self._predicates)
1672
1673
1674def _is_nonexistent_board_error(e):
1675    """Return True if error is caused by nonexistent board label.
1676
1677    As of this writing, the particular case we want looks like this:
1678
1679     1) e.problem_keys is a dictionary
1680     2) e.problem_keys['meta_hosts'] exists as the only key
1681        in the dictionary.
1682     3) e.problem_keys['meta_hosts'] matches this pattern:
1683        "Label "board:.*" not found"
1684
1685    We check for conditions 1) and 2) on the
1686    theory that they're relatively immutable.
1687    We don't check condition 3) because it seems
1688    likely to be a maintenance burden, and for the
1689    times when we're wrong, being right shouldn't
1690    matter enough (we _hope_).
1691
1692    @param e: proxy.ValidationError instance
1693    @returns: boolean
1694    """
1695    return (isinstance(e.problem_keys, dict)
1696            and len(e.problem_keys) == 1
1697            and 'meta_hosts' in e.problem_keys)
1698
1699
1700class _ResultReporter(object):
1701    """Abstract base class for reporting test results.
1702
1703    Usually, this is used to report test failures.
1704    """
1705
1706    __metaclass__ = abc.ABCMeta
1707
1708    @abc.abstractmethod
1709    def report(self, result):
1710        """Report test result.
1711
1712        @param result: Status instance for job.
1713        """
1714
1715
1716class _EmailReporter(_ResultReporter):
1717    """Class that emails based on test failures."""
1718
1719    # TODO(akeshet): Document what |bug_template| is actually supposed to come
1720    # from, and rename it to something unrelated to "bugs" which are no longer
1721    # relevant now that this is purely an email sender.
1722    def __init__(self, suite, bug_template=None):
1723        self._suite = suite
1724        self._bug_template = bug_template or {}
1725
1726    def _get_test_bug(self, result):
1727        """Get TestBug for the given result.
1728
1729        @param result: Status instance for a test job.
1730        @returns: TestBug instance.
1731        """
1732        # reporting modules have dependency on external packages, e.g., httplib2
1733        # Such dependency can cause issue to any module tries to import suite.py
1734        # without building site-packages first. Since the reporting modules are
1735        # only used in this function, move the imports here avoid the
1736        # requirement of building site packages to use other functions in this
1737        # module.
1738        from autotest_lib.server.cros.dynamic_suite import reporting
1739
1740        job_views = self._suite._tko.run('get_detailed_test_views',
1741                                         afe_job_id=result.id)
1742        return reporting.TestBug(self._suite._job_creator.cros_build,
1743                utils.get_chrome_version(job_views),
1744                self._suite._tag,
1745                result)
1746
1747    def _get_bug_template(self, result):
1748        """Get BugTemplate for test job.
1749
1750        @param result: Status instance for job.
1751        @param bug_template: A template dictionary specifying the default bug
1752                             filing options for failures in this suite.
1753        @returns: BugTemplate instance
1754        """
1755        # reporting modules have dependency on external packages, e.g., httplib2
1756        # Such dependency can cause issue to any module tries to import suite.py
1757        # without building site-packages first. Since the reporting modules are
1758        # only used in this function, move the imports here avoid the
1759        # requirement of building site packages to use other functions in this
1760        # module.
1761        from autotest_lib.server.cros.dynamic_suite import reporting_utils
1762
1763        # Try to merge with bug template in test control file.
1764        template = reporting_utils.BugTemplate(self._bug_template)
1765        try:
1766            test_data = self._suite._jobs_to_tests[result.id]
1767            return template.finalize_bug_template(
1768                    test_data.bug_template)
1769        except AttributeError:
1770            # Test control file does not have bug template defined.
1771            return template.bug_template
1772        except reporting_utils.InvalidBugTemplateException as e:
1773            logging.error('Merging bug templates failed with '
1774                          'error: %s An empty bug template will '
1775                          'be used.', e)
1776            return {}
1777
1778    def report(self, result):
1779        # reporting modules have dependency on external
1780        # packages, e.g., httplib2 Such dependency can cause
1781        # issue to any module tries to import suite.py without
1782        # building site-packages first. Since the reporting
1783        # modules are only used in this function, move the
1784        # imports here avoid the requirement of building site
1785        # packages to use other functions in this module.
1786        from autotest_lib.server.cros.dynamic_suite import reporting
1787
1788        reporting.send_email(
1789                self._get_test_bug(result),
1790                self._get_bug_template(result))
1791