1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import logging
7import os
8import re
9import time
10from distutils import version
11
12import common
13from autotest_lib.client.common_lib import autotemp
14from autotest_lib.client.common_lib import error
15from autotest_lib.client.common_lib import utils
16
17
18class ManifestVersionsException(Exception):
19    """Base class for exceptions from this package."""
20    pass
21
22
23class QueryException(ManifestVersionsException):
24    """Raised to indicate a failure while searching for manifests."""
25    pass
26
27
28class CloneException(ManifestVersionsException):
29    """Raised when `git clone` fails to create the repository."""
30    pass
31
32
33def _SystemOutput(command, timeout=None, args=()):
34    """Shell out to run a command, expecting data on stderr. Return stdout.
35
36    Shells out to run |command|, optionally passing escaped |args|.
37    Instead of logging stderr at ERROR level, will log at default
38    stdout log level.  Normal stdout is returned.
39
40    @param command: command string to execute.
41    @param timeout: time limit in seconds before attempting to kill the
42            running process. The function will take a few seconds longer
43            than 'timeout' to complete if it has to kill the process.
44    @param args: sequence of strings of arguments to be given to the command
45            inside " quotes after they have been escaped for that; each
46            element in the sequence will be given as a separate command
47            argument.
48
49    @return a string with the stdout output of the command.
50    """
51    out = utils.run(command, timeout=timeout, ignore_status=False,
52                    stderr_is_expected=True, args=args).stdout
53    return out.rstrip('\n')
54
55
56def _System(command, timeout=None):
57    """Run a command, expecting data on stderr.
58
59    @param command: command string to execute.
60    @param timeout: timeout in seconds
61    """
62    utils.run(command, timeout=timeout, ignore_status=False,
63              stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
64              stderr_is_expected=True)
65
66
67class ManifestVersions(object):
68    """Class to allow discovery of manifests for new successful CrOS builds.
69
70    Manifest versions is a repository that contains information on which
71    builds passed/failed. This class is responsible for keeping a temp
72    copy of the repository up to date.
73
74    @var _CLONE_RETRY_SECONDS: Number of seconds to wait before retrying
75                                a failed `git clone` operation.
76    @var _CLONE_MAX_RETRIES: Maximum number of times to retry a failed
77                             a failed `git clone` operation.
78    @var _MANIFEST_VERSIONS_URL: URL of the internal manifest-versions git repo.
79    @var _BOARD_MANIFEST_GLOB_PATTERN: pattern for shell glob for passed-build
80                                       manifests for a given board.
81    @var _BOARD_MANIFEST_RE_PATTERN: pattern for regex that parses paths to
82                                     manifests for a given board.
83
84    @var _git: absolute path of git binary.
85    @var _tempdir: a scoped tempdir.  Will be destroyed on instance deletion.
86    """
87
88    _CLONE_RETRY_SECONDS = 5 * 60
89    _CLONE_MAX_RETRIES = 60 * 60 / _CLONE_RETRY_SECONDS
90    _MANIFEST_VERSIONS_URL = ('https://chrome-internal-review.googlesource.com/'
91                              'chromeos/manifest-versions.git')
92    _ANY_MANIFEST_GLOB_PATTERN = 'build-name/*/pass/'
93    _BOARD_MANIFEST_GLOB_PATTERN = 'build-name/%s-*/pass/'
94    _BOARD_MANIFEST_RE_PATTERN = (r'build-name/%s-([^-]+)'
95                                  r'(?:-group)?/pass/(\d+)/([0-9.]+)\.xml')
96    _BOARD_BRANCH_MANIFEST_GLOB_PATTERN = 'build-name/%s-%s/pass/'
97
98
99    def __init__(self, tmp_repo_dir=None):
100        """Create a manifest versions manager.
101
102        @param tmp_repo_dir: For use in testing, if one does not wish to repeatedly
103            clone the manifest versions repo that is currently a few GB in size.
104        """
105        self._git = _SystemOutput('which git')
106        if tmp_repo_dir:
107            self._tempdir = autotemp.dummy_dir(tmp_repo_dir)
108        else:
109            self._tempdir = autotemp.tempdir(unique_id='_suite_scheduler')
110
111
112    def AnyManifestsSinceRev(self, revision):
113        """Determine if any builds passed since git |revision|.
114
115        @param revision: the git revision to look back to.
116        @return True if any builds have passed; False otherwise.
117        """
118        manifest_paths = self._ExpandGlobMinusPrefix(
119                self._tempdir.name, self._ANY_MANIFEST_GLOB_PATTERN)
120        if not manifest_paths:
121            logging.error('No paths to check for manifests???')
122            return False
123        logging.info('Checking if any manifests landed since %s', revision)
124        log_cmd = self._BuildCommand('log',
125                                     revision + '..HEAD',
126                                     '--pretty="format:%H"',
127                                     '--',
128                                     ' '.join(manifest_paths))
129        return _SystemOutput(log_cmd).strip() != ''
130
131
132    def Initialize(self):
133        """Set up internal state.  Must be called before other methods.
134
135        Clone manifest-versions.git into tempdir managed by this instance.
136        """
137        # If gerrit goes down during suite_scheduler operation,
138        # we'll enter a loop like the following:
139        #  1. suite_scheduler fails executing some `git` command.
140        #  2. The failure is logged at ERROR level, causing an
141        #     e-mail notification of the failure.
142        #  3. suite_scheduler terminates.
143        #  4. Upstart respawns suite_scheduler.
144        #  5. suite_scheduler comes here to restart with a new
145        #     manifest-versions repo.
146        #  6. `git clone` fails, and we go back to step 2.
147        #
148        # We want to rate limit the e-mail notifications, so we
149        # retry failed `git clone` operations for a time before we
150        # finally give up.
151        retry_count = 0
152        msg = None
153        while retry_count <= self._CLONE_MAX_RETRIES:
154            if retry_count:
155                time.sleep(self._CLONE_RETRY_SECONDS)
156            retry_count += 1
157            try:
158                logging.debug('Cloning manifest-versions.git,'
159                              ' attempt %d.', retry_count)
160                self._Clone()
161                logging.debug('manifest-versions.git cloned.')
162                return
163            except error.CmdError as e:
164                msg = str(e)
165                logging.debug('Clone failed: %s', msg)
166        raise CloneException('Failed to clone %s after %d attempts: %s' %
167                             (self._MANIFEST_VERSIONS_URL, retry_count, msg))
168
169
170    def ManifestsSinceDate(self, since_date, board):
171        """Return map of branch:manifests for |board| since |since_date|.
172
173        To fully specify a 'branch', one needs both the type and the numeric
174        milestone the branch was cut for, e.g. ('release', '19') or
175        ('factory', '17').
176
177        @param since_date: a datetime object, return all manifest files
178                           since |since_date|
179        @param board: the board whose manifests we want to check for.
180        @return {(branch_type, milestone): [manifests, oldest, to, newest]}
181        """
182        return self._GetManifests(
183            re.compile(self._BOARD_MANIFEST_RE_PATTERN % board),
184            self._QueryManifestsSinceDate(since_date, board))
185
186
187    def ManifestsSinceRev(self, rev, board):
188        """Return map of branch:manifests for |board| since git |rev|.
189
190        To fully specify a 'branch', one needs both the type and the numeric
191        milestone the branch was cut for, e.g. ('release', '19') or
192        ('factory', '17').
193
194        @param rev: return all manifest files from |rev| up to HEAD.
195        @param board: the board whose manifests we want to check for.
196        @return {(branch_type, milestone): [manifests, oldest, to, newest]}
197        """
198        return self._GetManifests(
199            re.compile(self._BOARD_MANIFEST_RE_PATTERN % board),
200            self._QueryManifestsSinceRev(rev, board))
201
202
203    def GetLatestManifest(self, board, build_type, milestone=None):
204        """Get the latest manifest of a given board and type.
205
206        @param board: the board whose manifests we want to check for.
207        @param build_type: Type of a build, e.g., release, factory or firmware.
208        @param milestone: Milestone to look for the latest manifest. Default to
209                          None, i.e., use the latest milestone.
210
211        @return: (milestone, manifest), e.g., (46, '7268.0.0')
212
213        """
214        milestones_folder = os.path.join(
215                self._tempdir.name,
216                self._BOARD_BRANCH_MANIFEST_GLOB_PATTERN % (board, build_type))
217        if not milestone:
218            try:
219                milestone_names = os.listdir(milestones_folder)
220            except OSError:
221                milestone_names = None
222            if not milestone_names:
223                raise QueryException('There is no milestone existed in %s.' %
224                                     milestones_folder)
225            milestone = max([m for m in milestone_names if m.isdigit()])
226        manifests_folder = os.path.join(milestones_folder, str(milestone))
227        manifests = [m.strip('.xml') for m in  os.listdir(manifests_folder)
228                     if m.endswith('.xml')]
229        if not manifests:
230            raise QueryException('There is no build existed in %s.' %
231                                 manifests_folder)
232        manifests.sort(key=version.LooseVersion)
233        return milestone, manifests[-1]
234
235
236    def _GetManifests(self, matcher, manifest_paths):
237        """Parse a list of manifest_paths into a map of branch:manifests.
238
239        Given a regexp |matcher| and a list of paths to manifest files,
240        parse the paths and build up a map of branches to manifests of
241        builds on those branches.
242
243        To fully specify a 'branch', one needs both the type and the numeric
244        milestone the branch was cut for, e.g. ('release', '19') or
245        ('factory', '17').
246
247        @param matcher: a compiled regexp that can be used to parse info
248                        out of the path to a manifest file.
249        @param manifest_paths: an iterable of paths to manifest files.
250        @return {(branch_type, milestone): [manifests, oldest, to, newest]}
251        """
252        branch_manifests = {}
253        for manifest_path in manifest_paths:
254            logging.debug('parsing manifest path %s', manifest_path)
255            match = matcher.match(manifest_path)
256            if not match:
257                logging.warning('Failed to parse path %s, regex: %s',
258                                manifest_path, matcher.pattern)
259                continue
260            groups = match.groups()
261            config_type, milestone, manifest = groups
262            branch = branch_manifests.setdefault((config_type, milestone), [])
263            branch.append(manifest)
264        for manifest_list in branch_manifests.itervalues():
265            manifest_list.sort(key=version.LooseVersion)
266        return branch_manifests
267
268
269    def GetCheckpoint(self):
270        """Return the latest checked-out git revision in manifest-versions.git.
271
272        @return the git hash of the latest git revision.
273        """
274        return _SystemOutput(self._BuildCommand('log',
275                                                '--pretty="format:%H"',
276                                                '--max-count=1')).strip()
277
278
279    def Update(self):
280        """Get latest manifest information."""
281        return _System(self._BuildCommand('pull'))
282
283
284    def _BuildCommand(self, command, *args):
285        """Build a git CLI |command|, passing space-delineated |args|.
286
287        @param command: the git sub-command to use.
288        @param args: args for the git sub-command.  Will be space-delineated.
289        @return a string with the above formatted into it.
290        """
291        return '%s --git-dir=%s --work-tree=%s %s %s' % (
292            self._git, os.path.join(self._tempdir.name, '.git'),
293            self._tempdir.name, command, ' '.join(args))
294
295
296    def _Clone(self):
297        """Clone self._MANIFEST_VERSIONS_URL into a local temp dir."""
298        # Can't use --depth here because the internal gerrit server doesn't
299        # support it.  Wish we could.  http://crosbug.com/29047
300        # Also, note that --work-tree and --git-dir interact oddly with
301        # 'git clone', so we don't use them.
302        _System('%s clone %s %s' % (self._git,
303                                    self._MANIFEST_VERSIONS_URL,
304                                    self._tempdir.name))
305
306
307    def _ShowCmd(self):
308        """Return a git command that shows file names added by commits."""
309        return self._BuildCommand('show',
310                                  '--pretty="format:"',
311                                  '--name-only',
312                                  '--diff-filter=A')
313
314
315    def _QueryManifestsSinceRev(self, git_rev, board):
316        """Get manifest filenames for |board|, since |git_rev|.
317
318        @param git_rev: check for manifests newer than this git commit.
319        @param board: the board whose manifests we want to check for.
320        @return whitespace-delineated
321        @raise QueryException if errors occur.
322        """
323        return self._QueryManifestsSince(git_rev + '..HEAD', board)
324
325
326    def _QueryManifestsSinceDate(self, since_date, board):
327        """Return list of manifest files for |board| since |since_date|.
328
329        @param sync_date: a datetime object, return all manifest files
330                          since |since_date|.
331        @param board: the board whose manifests we want to check for.
332        @raise QueryException if errors occur.
333        """
334        return self._QueryManifestsSince('--since="%s"' % since_date, board)
335
336
337    def _ExpandGlobMinusPrefix(self, prefix, path_glob):
338        """Expand |path_glob| under dir |prefix|, then remove |prefix|.
339
340        Path-concatenate prefix and path_glob, then expand the resulting glob.
341        Take the results and remove |prefix| (and path separator) from each.
342        Return the resulting list.
343
344        Assuming /tmp/foo/baz and /tmp/bar/baz both exist,
345        _ExpandGlobMinusPrefix('/tmp', '*/baz')  # ['bar/baz', 'foo/baz']
346
347        @param prefix: a path under which to expand |path_glob|.
348        @param path_glob: the glob to expand.
349        @return a list of paths relative to |prefix|, based on |path_glob|.
350        """
351        full_glob = os.path.join(prefix, path_glob)
352        return [p[len(prefix)+1:] for p in glob.iglob(full_glob)]
353
354
355    def _QueryManifestsSince(self, since_spec, board):
356        """Return list of manifest files for |board|, since |since_spec|.
357
358        @param since_spec: a formatted arg to git log that specifies a starting
359                           point to list commits from, e.g.
360                             '--since="2 days ago"' or 'd34db33f..'
361        @param board: the board whose manifests we want to check for.
362        @raise QueryException if git log or git show errors occur.
363        """
364        manifest_paths = self._ExpandGlobMinusPrefix(
365            self._tempdir.name, self._BOARD_MANIFEST_GLOB_PATTERN % board)
366        log_cmd = self._BuildCommand('log',
367                                     since_spec,
368                                     '--pretty="format:%H"',
369                                     '--',
370                                     ' '.join(manifest_paths))
371        try:
372            # If we pass nothing to git show, we get unexpected results.
373            # So, return early if git log is going to give us nothing.
374            if not manifest_paths or not _SystemOutput(log_cmd):
375                return []
376            manifests = _SystemOutput('%s|xargs %s' % (log_cmd,
377                                                       self._ShowCmd()))
378        except (IOError, OSError) as e:
379            raise QueryException(e)
380        logging.debug('found %s', manifests)
381        return [m for m in re.split('\s+', manifests) if m]
382