1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import httplib
7import logging
8import multiprocessing
9import os
10import re
11import urlparse
12import urllib2
13
14from autotest_lib.client.bin import utils
15from autotest_lib.client.common_lib import error, global_config
16from autotest_lib.client.common_lib.cros import dev_server
17from autotest_lib.client.common_lib.cros.graphite import autotest_stats
18
19
20# Local stateful update path is relative to the CrOS source directory.
21LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update'
22LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update'
23UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
24UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
25# A list of update engine client states that occur after an update is triggered.
26UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
27                             'UPDATE_STATUS_UPDATE_AVAILABLE',
28                             'UPDATE_STATUS_DOWNLOADING',
29                             'UPDATE_STATUS_FINALIZING']
30
31class ChromiumOSError(error.InstallError):
32    """Generic error for ChromiumOS-specific exceptions."""
33
34
35class BrilloError(error.InstallError):
36    """Generic error for Brillo-specific exceptions."""
37
38
39class RootFSUpdateError(ChromiumOSError):
40    """Raised when the RootFS fails to update."""
41
42
43class StatefulUpdateError(ChromiumOSError):
44    """Raised when the stateful partition fails to update."""
45
46
47def url_to_version(update_url):
48    """Return the version based on update_url.
49
50    @param update_url: url to the image to update to.
51
52    """
53    # The Chrome OS version is generally the last element in the URL. The only
54    # exception is delta update URLs, which are rooted under the version; e.g.,
55    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
56    # strip off the au section of the path before reading the version.
57    return re.sub('/au/.*', '',
58                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
59
60
61def url_to_image_name(update_url):
62    """Return the image name based on update_url.
63
64    From a URL like:
65        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
66    return lumpy-release/R27-3837.0.0
67
68    @param update_url: url to the image to update to.
69    @returns a string representing the image name in the update_url.
70
71    """
72    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
73
74
75def _get_devserver_build_from_update_url(update_url):
76    """Get the devserver and build from the update url.
77
78    @param update_url: The url for update.
79        Eg: http://devserver:port/update/build.
80
81    @return: A tuple of (devserver url, build) or None if the update_url
82        doesn't match the expected pattern.
83
84    @raises ValueError: If the update_url doesn't match the expected pattern.
85    @raises ValueError: If no global_config was found, or it doesn't contain an
86        image_url_pattern.
87    """
88    pattern = global_config.global_config.get_config_value(
89            'CROS', 'image_url_pattern', type=str, default='')
90    if not pattern:
91        raise ValueError('Cannot parse update_url, the global config needs '
92                'an image_url_pattern.')
93    re_pattern = pattern.replace('%s', '(\S+)')
94    parts = re.search(re_pattern, update_url)
95    if not parts or len(parts.groups()) < 2:
96        raise ValueError('%s is not an update url' % update_url)
97    return parts.groups()
98
99
100def list_image_dir_contents(update_url):
101    """Lists the contents of the devserver for a given build/update_url.
102
103    @param update_url: An update url. Eg: http://devserver:port/update/build.
104    """
105    if not update_url:
106        logging.warning('Need update_url to list contents of the devserver.')
107        return
108    error_msg = 'Cannot check contents of devserver, update url %s' % update_url
109    try:
110        devserver_url, build = _get_devserver_build_from_update_url(update_url)
111    except ValueError as e:
112        logging.warning('%s: %s', error_msg, e)
113        return
114    devserver = dev_server.ImageServer(devserver_url)
115    try:
116        devserver.list_image_dir(build)
117    # The devserver will retry on URLError to avoid flaky connections, but will
118    # eventually raise the URLError if it persists. All HTTPErrors get
119    # converted to DevServerExceptions.
120    except (dev_server.DevServerException, urllib2.URLError) as e:
121        logging.warning('%s: %s', error_msg, e)
122
123
124# TODO(garnold) This implements shared updater functionality needed for
125# supporting the autoupdate_EndToEnd server-side test. We should probably
126# migrate more of the existing ChromiumOSUpdater functionality to it as we
127# expand non-CrOS support in other tests.
128class BaseUpdater(object):
129    """Platform-agnostic DUT update functionality."""
130
131    def __init__(self, updater_ctrl_bin, update_url, host):
132        """Initializes the object.
133
134        @param updater_ctrl_bin: Path to update_engine_client.
135        @param update_url: The URL we want the update to use.
136        @param host: A client.common_lib.hosts.Host implementation.
137        """
138        self.updater_ctrl_bin = updater_ctrl_bin
139        self.update_url = update_url
140        self.host = host
141        self._update_error_queue = multiprocessing.Queue(2)
142
143
144    def check_update_status(self):
145        """Returns the current update engine state.
146
147        We use the `update_engine_client -status' command and parse the line
148        indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
149        """
150        update_status = self.host.run(
151            '%s -status 2>&1 | grep CURRENT_OP' % self.updater_ctrl_bin)
152        return update_status.stdout.strip().split('=')[-1]
153
154
155    def trigger_update(self):
156        """Triggers a background update.
157
158        @raise RootFSUpdateError if anything went wrong.
159        """
160        autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
161                          (self.updater_ctrl_bin, self.update_url))
162        err_msg = 'Failed to trigger an update on %s.' % self.host.hostname
163        logging.info('Triggering update via: %s', autoupdate_cmd)
164        try:
165            self.host.run(autoupdate_cmd)
166        except (error.AutoservSshPermissionDeniedError,
167                error.AutoservSSHTimeout) as e:
168            err_msg += ' SSH reports an error: %s' % type(e).__name__
169            raise RootFSUpdateError(err_msg)
170        except error.AutoservRunError as e:
171            # Check if the exit code is 255, if so it's probably a generic
172            # SSH error.
173            result = e.args[1]
174            if result.exit_status == 255:
175                err_msg += (' SSH reports a generic error (255), which could '
176                            'indicate a problem with underlying connectivity '
177                            'layers.')
178                raise RootFSUpdateError(err_msg)
179
180            # We have ruled out all SSH cases, the error code is from
181            # update_engine_client, though we still don't know why.
182            list_image_dir_contents(self.update_url)
183            err_msg += (' It could be that the devserver is unreachable, the '
184                        'payload unavailable, or there is a bug in the update '
185                        'engine (unlikely). Reported error: %s' %
186                        type(e).__name__)
187            raise RootFSUpdateError(err_msg)
188
189
190    def _verify_update_completed(self):
191        """Verifies that an update has completed.
192
193        @raise RootFSUpdateError: if verification fails.
194        """
195        status = self.check_update_status()
196        if status != UPDATER_NEED_REBOOT:
197            raise RootFSUpdateError('Update did not complete with correct '
198                                    'status. Expecting %s, actual %s' %
199                                    (UPDATER_NEED_REBOOT, status))
200
201
202    def update_image(self):
203        """Updates the device image and verifies success."""
204        try:
205            autoupdate_cmd = ('%s --update --omaha_url=%s 2>&1' %
206                              (self.updater_ctrl_bin, self.update_url))
207            self.host.run(autoupdate_cmd, timeout=3600)
208        except error.AutoservRunError as e:
209            list_image_dir_contents(self.update_url)
210            update_error = RootFSUpdateError(
211                    'Failed to install device image using payload at %s '
212                    'on %s: %s' %
213                    (self.update_url, self.host.hostname, e))
214            self._update_error_queue.put(update_error)
215            raise update_error
216        except Exception as e:
217            # Don't allow other exceptions to not be caught.
218            self._update_error_queue.put(e)
219            raise e
220
221        try:
222            self._verify_update_completed()
223        except RootFSUpdateError as e:
224            self._update_error_queue.put(e)
225            raise
226
227
228class ChromiumOSUpdater(BaseUpdater):
229    """Helper class used to update DUT with image of desired version."""
230    REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update'
231    UPDATER_BIN = '/usr/bin/update_engine_client'
232    STATEFUL_UPDATE = '/tmp/stateful_update'
233    UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed'
234    UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
235
236    KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
237    KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
238    # Time to wait for new kernel to be marked successful after
239    # auto update.
240    KERNEL_UPDATE_TIMEOUT = 120
241
242    _timer = autotest_stats.Timer('cros_autoupdater')
243
244    def __init__(self, update_url, host=None, local_devserver=False):
245        super(ChromiumOSUpdater, self).__init__(self.UPDATER_BIN, update_url,
246                                                host)
247        self.local_devserver = local_devserver
248        if not local_devserver:
249            self.update_version = url_to_version(update_url)
250        else:
251            self.update_version = None
252
253
254    def reset_update_engine(self):
255        """Resets the host to prepare for a clean update regardless of state."""
256        self._run('rm -f %s' % self.UPDATED_MARKER)
257        self._run('stop ui || true')
258        self._run('stop update-engine || true')
259        self._run('start update-engine')
260
261        if self.check_update_status() != UPDATER_IDLE:
262            raise ChromiumOSError('%s is not in an installable state' %
263                                  self.host.hostname)
264
265
266    def _run(self, cmd, *args, **kwargs):
267        """Abbreviated form of self.host.run(...)"""
268        return self.host.run(cmd, *args, **kwargs)
269
270
271    def rootdev(self, options=''):
272        """Returns the stripped output of rootdev <options>.
273
274        @param options: options to run rootdev.
275
276        """
277        return self._run('rootdev %s' % options).stdout.strip()
278
279
280    def get_kernel_state(self):
281        """Returns the (<active>, <inactive>) kernel state as a pair."""
282        active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0])
283        if active_root == self.KERNEL_A['root']:
284            return self.KERNEL_A, self.KERNEL_B
285        elif active_root == self.KERNEL_B['root']:
286            return self.KERNEL_B, self.KERNEL_A
287        else:
288            raise ChromiumOSError('Encountered unknown root partition: %s' %
289                                  active_root)
290
291
292    def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'):
293        """Return numeric cgpt value for the specified flag, kernel, device. """
294        return int(self._run('cgpt show -n -i %d %s %s' % (
295            kernel['kernel'], flag, dev)).stdout.strip())
296
297
298    def get_kernel_priority(self, kernel):
299        """Return numeric priority for the specified kernel.
300
301        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
302
303        """
304        return self._cgpt('-P', kernel)
305
306
307    def get_kernel_success(self, kernel):
308        """Return boolean success flag for the specified kernel.
309
310        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
311
312        """
313        return self._cgpt('-S', kernel) != 0
314
315
316    def get_kernel_tries(self, kernel):
317        """Return tries count for the specified kernel.
318
319        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
320
321        """
322        return self._cgpt('-T', kernel)
323
324
325    def get_stateful_update_script(self):
326        """Returns the path to the stateful update script on the target."""
327        # We attempt to load the local stateful update path in 3 different
328        # ways. First we use the location specified in the autotest global
329        # config. If this doesn't exist, we attempt to use the Chromium OS
330        # Chroot path to the installed script. If all else fails, we use the
331        # stateful update script on the host.
332        stateful_update_path = os.path.join(
333                global_config.global_config.get_config_value(
334                        'CROS', 'source_tree', default=''),
335                LOCAL_STATEFUL_UPDATE_PATH)
336
337        if not os.path.exists(stateful_update_path):
338            logging.warning('Could not find Chrome OS source location for '
339                            'stateful_update script at %s, falling back to '
340                            'chroot copy.', stateful_update_path)
341            stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH
342
343        if not os.path.exists(stateful_update_path):
344            logging.warning('Could not chroot stateful_update script, falling '
345                            'back on client copy.')
346            statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH
347        else:
348            self.host.send_file(
349                    stateful_update_path, self.STATEFUL_UPDATE,
350                    delete_dest=True)
351            statefuldev_script = self.STATEFUL_UPDATE
352
353        return statefuldev_script
354
355
356    def reset_stateful_partition(self):
357        """Clear any pending stateful update request."""
358        statefuldev_cmd = [self.get_stateful_update_script()]
359        statefuldev_cmd += ['--stateful_change=reset', '2>&1']
360        self._run(' '.join(statefuldev_cmd))
361
362
363    def revert_boot_partition(self):
364        """Revert the boot partition."""
365        part = self.rootdev('-s')
366        logging.warning('Reverting update; Boot partition will be %s', part)
367        return self._run('/postinst %s 2>&1' % part)
368
369
370    def rollback_rootfs(self, powerwash):
371        """Triggers rollback and waits for it to complete.
372
373        @param powerwash: If true, powerwash as part of rollback.
374
375        @raise RootFSUpdateError if anything went wrong.
376
377        """
378        version = self.host.get_release_version()
379        # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
380        # X.Y.Z. This version split just pulls the first part out.
381        try:
382            build_number = int(version.split('.')[0])
383        except ValueError:
384            logging.error('Could not parse build number.')
385            build_number = 0
386
387        if build_number >= 5772:
388            can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN
389            logging.info('Checking for rollback.')
390            try:
391                self._run(can_rollback_cmd)
392            except error.AutoservRunError as e:
393                raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
394                                        (self.host.hostname, str(e)))
395
396        rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN
397        if not powerwash:
398            rollback_cmd += ' --nopowerwash'
399
400        logging.info('Performing rollback.')
401        try:
402            self._run(rollback_cmd)
403        except error.AutoservRunError as e:
404            raise RootFSUpdateError('Rollback failed on %s: %s' %
405                                    (self.host.hostname, str(e)))
406
407        self._verify_update_completed()
408
409
410    # TODO(garnold) This is here for backward compatibility and should be
411    # deprecated once we shift to using update_image() everywhere.
412    @_timer.decorate
413    def update_rootfs(self):
414        """Run the standard command to force an update."""
415        return self.update_image()
416
417
418    @_timer.decorate
419    def update_stateful(self, clobber=True):
420        """Updates the stateful partition.
421
422        @param clobber: If True, a clean stateful installation.
423        """
424        logging.info('Updating stateful partition...')
425        statefuldev_url = self.update_url.replace('update',
426                                                  'static')
427
428        # Attempt stateful partition update; this must succeed so that the newly
429        # installed host is testable after update.
430        statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
431        if clobber:
432            statefuldev_cmd.append('--stateful_change=clean')
433
434        statefuldev_cmd.append('2>&1')
435        try:
436            self._run(' '.join(statefuldev_cmd), timeout=1200)
437        except error.AutoservRunError:
438            update_error = StatefulUpdateError(
439                    'Failed to perform stateful update on %s' %
440                    self.host.hostname)
441            self._update_error_queue.put(update_error)
442            raise update_error
443        except Exception as e:
444            # Don't allow other exceptions to not be caught.
445            self._update_error_queue.put(e)
446            raise e
447
448
449    @_timer.decorate
450    def run_update(self, update_root=True):
451        """Update the DUT with image of specific version.
452
453        @param update_root: True to force a rootfs update.
454        """
455        booted_version = self.host.get_release_version()
456        if self.update_version:
457            logging.info('Updating from version %s to %s.',
458                         booted_version, self.update_version)
459
460        # Check that Dev Server is accepting connections (from autoserv's host).
461        # If we can't talk to it, the machine host probably can't either.
462        auserver_host = urlparse.urlparse(self.update_url)[1]
463        try:
464            httplib.HTTPConnection(auserver_host).connect()
465        except IOError:
466            raise ChromiumOSError(
467                'Update server at %s not available' % auserver_host)
468
469        logging.info('Installing from %s to %s', self.update_url,
470                     self.host.hostname)
471
472        # Reset update state.
473        self.reset_update_engine()
474        self.reset_stateful_partition()
475
476        try:
477            updaters = [
478                multiprocessing.process.Process(target=self.update_rootfs),
479                multiprocessing.process.Process(target=self.update_stateful)
480                ]
481            if not update_root:
482                logging.info('Root update is skipped.')
483                updaters = updaters[1:]
484
485            # Run the updaters in parallel.
486            for updater in updaters: updater.start()
487            for updater in updaters: updater.join()
488
489            # Re-raise the first error that occurred.
490            if not self._update_error_queue.empty():
491                update_error = self._update_error_queue.get()
492                self.revert_boot_partition()
493                self.reset_stateful_partition()
494                raise update_error
495
496            logging.info('Update complete.')
497        except:
498            # Collect update engine logs in the event of failure.
499            if self.host.job:
500                logging.info('Collecting update engine logs...')
501                self.host.get_file(
502                        self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
503                        preserve_perm=False)
504            list_image_dir_contents(self.update_url)
505            raise
506        finally:
507            self.host.show_update_engine_log()
508
509
510    def check_version(self):
511        """Check the image running in DUT has the desired version.
512
513        @returns: True if the DUT's image version matches the version that
514            the autoupdater tries to update to.
515
516        """
517        booted_version = self.host.get_release_version()
518        return (self.update_version and
519                self.update_version.endswith(booted_version))
520
521
522    def check_version_to_confirm_install(self):
523        """Check image running in DUT has the desired version to be installed.
524
525        The method should not be used to check if DUT needs to have a full
526        reimage. Only use it to confirm a image is installed.
527
528        The method is designed to verify version for following 6 scenarios with
529        samples of version to update to and expected booted version:
530        1. trybot paladin build.
531        update version: trybot-lumpy-paladin/R27-3837.0.0-b123
532        booted version: 3837.0.2013_03_21_1340
533
534        2. trybot release build.
535        update version: trybot-lumpy-release/R27-3837.0.0-b456
536        booted version: 3837.0.0
537
538        3. buildbot official release build.
539        update version: lumpy-release/R27-3837.0.0
540        booted version: 3837.0.0
541
542        4. non-official paladin rc build.
543        update version: lumpy-paladin/R27-3878.0.0-rc7
544        booted version: 3837.0.0-rc7
545
546        5. chrome-perf build.
547        update version: lumpy-chrome-perf/R28-3837.0.0-b2996
548        booted version: 3837.0.0
549
550        6. pgo-generate build.
551        update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996
552        booted version: 3837.0.0-pgo-generate
553
554        When we are checking if a DUT needs to do a full install, we should NOT
555        use this method to check if the DUT is running the same version, since
556        it may return false positive for a DUT running trybot paladin build to
557        be updated to another trybot paladin build.
558
559        TODO: This logic has a bug if a trybot paladin build failed to be
560        installed in a DUT running an older trybot paladin build with same
561        platform number, but different build number (-b###). So to conclusively
562        determine if a tryjob paladin build is imaged successfully, we may need
563        to find out the date string from update url.
564
565        @returns: True if the DUT's image version (without the date string if
566            the image is a trybot build), matches the version that the
567            autoupdater is trying to update to.
568
569        """
570        # In the local_devserver case, we can't know the expected
571        # build, so just pass.
572        if not self.update_version:
573            return True
574
575        # Always try the default check_version method first, this prevents
576        # any backward compatibility issue.
577        if self.check_version():
578            return True
579
580        return utils.version_match(self.update_version,
581                                   self.host.get_release_version(),
582                                   self.update_url)
583
584
585    def verify_boot_expectations(self, expected_kernel_state, rollback_message):
586        """Verifies that we fully booted given expected kernel state.
587
588        This method both verifies that we booted using the correct kernel
589        state and that the OS has marked the kernel as good.
590
591        @param expected_kernel_state: kernel state that we are verifying with
592            i.e. I expect to be booted onto partition 4 etc. See output of
593            get_kernel_state.
594        @param rollback_message: string to raise as a ChromiumOSError
595            if we booted with the wrong partition.
596
597        @raises ChromiumOSError: If we didn't.
598        """
599        # Figure out the newly active kernel.
600        active_kernel_state = self.get_kernel_state()[0]
601
602        # Check for rollback due to a bad build.
603        if (expected_kernel_state and
604                active_kernel_state != expected_kernel_state):
605
606            # Kernel crash reports should be wiped between test runs, but
607            # may persist from earlier parts of the test, or from problems
608            # with provisioning.
609            #
610            # Kernel crash reports will NOT be present if the crash happened
611            # before encrypted stateful is mounted.
612            #
613            # TODO(dgarrett): Integrate with server/crashcollect.py at some
614            # point.
615            kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
616            if kernel_crashes:
617                rollback_message += ': kernel_crash'
618                logging.debug('Found %d kernel crash reports:',
619                              len(kernel_crashes))
620                # The crash names contain timestamps that may be useful:
621                #   kernel.20131207.005945.0.kcrash
622                for crash in kernel_crashes:
623                    logging.debug('  %s', os.path.basename(crash))
624
625            # Print out some information to make it easier to debug
626            # the rollback.
627            logging.debug('Dumping partition table.')
628            self._run('cgpt show $(rootdev -s -d)')
629            logging.debug('Dumping crossystem for firmware debugging.')
630            self._run('crossystem --all')
631            raise ChromiumOSError(rollback_message)
632
633        # Make sure chromeos-setgoodkernel runs.
634        try:
635            utils.poll_for_condition(
636                lambda: (self.get_kernel_tries(active_kernel_state) == 0
637                         and self.get_kernel_success(active_kernel_state)),
638                exception=ChromiumOSError(),
639                timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
640        except ChromiumOSError:
641            services_status = self._run('status system-services').stdout
642            if services_status != 'system-services start/running\n':
643                event = ('Chrome failed to reach login screen')
644            else:
645                event = ('update-engine failed to call '
646                         'chromeos-setgoodkernel')
647            raise ChromiumOSError(
648                    'After update and reboot, %s '
649                    'within %d seconds' % (event,
650                                           self.KERNEL_UPDATE_TIMEOUT))
651
652
653class BrilloUpdater(BaseUpdater):
654    """Helper class for updating a Brillo DUT."""
655
656    def __init__(self, update_url, host=None):
657        """Initialize the object.
658
659        @param update_url: The URL we want the update to use.
660        @param host: A client.common_lib.hosts.Host implementation.
661        """
662        super(BrilloUpdater, self).__init__(
663                '/system/bin/update_engine_client', update_url, host)
664