1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import collections
6import json
7import logging
8import os
9import urlparse
10
11from autotest_lib.client.common_lib import error
12from autotest_lib.client.common_lib.cros import dev_server
13from autotest_lib.server.cros.update_engine import chromiumos_test_platform
14from autotest_lib.server.cros.update_engine import update_engine_test
15
16
17def snippet(text):
18    """Returns the text with start/end snip markers around it.
19
20    @param text: The snippet text.
21
22    @return The text with start/end snip markers around it.
23    """
24    snip = '---8<---' * 10
25    start = '-- START -'
26    end = '-- END -'
27    return ('%s%s\n%s\n%s%s' %
28            (start, snip[len(start):], text, end, snip[len(end):]))
29
30
31class autoupdate_EndToEndTest(update_engine_test.UpdateEngineTest):
32    """Complete update test between two Chrome OS releases.
33
34    Performs an end-to-end test of updating a ChromeOS device from one version
35    to another. The test performs the following steps:
36
37      1. Stages the source (full) and target update payload on the central
38         devserver.
39      2. Installs a source image on the DUT (if provided) and reboots to it.
40      3. Then starts the target update by calling cros_au RPC on the devserver.
41      4. This call copies the devserver code and all payloads to the DUT.
42      5. Starts a devserver on the DUT.
43      6. Starts an update pointing to this localhost devserver.
44      7. Watches as the DUT applies the update to rootfs and stateful.
45      8. Reboots and repeats steps 5-6, ensuring that the next update check
46         shows the new image version.
47      9. Returns the hostlogs collected during each update check for
48         verification against expected update events.
49
50    This class interacts with several others:
51    UpdateEngineTest: base class for comparing expected update events against
52                      the events listed in the hostlog.
53    UpdateEngineEvent: class representing a single expected update engine event.
54    ChromiumOSTestPlatform: A class representing the Chrome OS device we are
55                            updating. It has functions for things the DUT can
56                            do: get logs, reboot, start update etc
57
58    The flow is like this: this class stages the payloads on
59    the devserver and then controls the flow of the test. It tells
60    ChromiumOSTestPlatform to start the update. When that is done updating, it
61    asks UpdateEngineTest to compare the update that just completed with an
62    expected update.
63
64    Some notes on naming:
65      devserver: Refers to a machine running the Chrome OS Update Devserver.
66      autotest_devserver: An autotest wrapper to interact with a devserver.
67                          Can be used to stage artifacts to a devserver. We
68                          will also class cros_au RPC on this devserver to
69                          start the update.
70      staged_url's: In this case staged refers to the fact that these items
71                     are available to be downloaded statically from these urls
72                     e.g. 'localhost:8080/static/my_file.gz'. These are usually
73                     given after staging an artifact using a autotest_devserver
74                     though they can be re-created given enough assumptions.
75    """
76    version = 1
77
78    # Timeout periods, given in seconds.
79    _WAIT_FOR_INITIAL_UPDATE_CHECK_SECONDS = 12 * 60
80    # TODO(sosa): Investigate why this needs to be so long (this used to be
81    # 120 and regressed).
82    _WAIT_FOR_DOWNLOAD_STARTED_SECONDS = 4 * 60
83    # See https://crbug.com/731214 before changing WAIT_FOR_DOWNLOAD
84    _WAIT_FOR_DOWNLOAD_COMPLETED_SECONDS = 20 * 60
85    _WAIT_FOR_UPDATE_COMPLETED_SECONDS = 4 * 60
86    _WAIT_FOR_UPDATE_CHECK_AFTER_REBOOT_SECONDS = 15 * 60
87
88    _DEVSERVER_HOSTLOG_ROOTFS = 'devserver_hostlog_rootfs'
89    _DEVSERVER_HOSTLOG_REBOOT = 'devserver_hostlog_reboot'
90
91    StagedURLs = collections.namedtuple('StagedURLs', ['source_url',
92                                                       'source_stateful_url',
93                                                       'target_url',
94                                                       'target_stateful_url'])
95
96    def initialize(self):
97        """Sets up variables that will be used by test."""
98        super(autoupdate_EndToEndTest, self).initialize()
99        self._host = None
100        self._autotest_devserver = None
101
102    def _get_least_loaded_devserver(self, test_conf):
103        """Find a devserver to use.
104
105        We first try to pick a devserver with the least load. In case all
106        devservers' load are higher than threshold, fall back to
107        the old behavior by picking a devserver based on the payload URI,
108        with which ImageServer.resolve will return a random devserver based on
109        the hash of the URI. The picked devserver needs to respect the
110        location of the host if 'prefer_local_devserver' is set to True or
111        'restricted_subnets' is  set.
112
113        @param test_conf: a dictionary of test settings.
114        """
115        hostname = self._host.hostname if self._host else None
116        least_loaded_devserver = dev_server.get_least_loaded_devserver(
117            hostname=hostname)
118        if least_loaded_devserver:
119            logging.debug('Choosing the least loaded devserver: %s',
120                          least_loaded_devserver)
121            autotest_devserver = dev_server.ImageServer(least_loaded_devserver)
122        else:
123            logging.warning('No devserver meets the maximum load requirement. '
124                            'Picking a random devserver to use.')
125            autotest_devserver = dev_server.ImageServer.resolve(
126                test_conf['target_payload_uri'], self._host.hostname)
127        devserver_hostname = urlparse.urlparse(
128            autotest_devserver.url()).hostname
129
130        logging.info('Devserver chosen for this run: %s', devserver_hostname)
131        return autotest_devserver
132
133
134    def _stage_payloads_onto_devserver(self, test_conf):
135        """Stages payloads that will be used by the test onto the devserver.
136
137        @param test_conf: a dictionary containing payload urls to stage.
138
139        @return a StagedURLs tuple containing the staged urls.
140        """
141        logging.info('Staging images onto autotest devserver (%s)',
142                     self._autotest_devserver.url())
143
144        source_uri = test_conf['source_payload_uri']
145        staged_src_uri, staged_src_stateful = self._stage_payloads(
146            source_uri, test_conf['source_archive_uri'])
147
148        target_uri = test_conf['target_payload_uri']
149        staged_target_uri, staged_target_stateful = self._stage_payloads(
150            target_uri, test_conf['target_archive_uri'],
151            test_conf['update_type'])
152
153        return self.StagedURLs(staged_src_uri, staged_src_stateful,
154                               staged_target_uri, staged_target_stateful)
155
156
157    def _stage_payloads(self, payload_uri, archive_uri, payload_type='full'):
158        """Stages a payload and its associated stateful on devserver."""
159        if payload_uri:
160            staged_uri = self._stage_payload_by_uri(payload_uri)
161
162            # Figure out where to get the matching stateful payload.
163            if archive_uri:
164                stateful_uri = self._get_stateful_uri(archive_uri)
165            else:
166                stateful_uri = self._payload_to_stateful_uri(payload_uri)
167            staged_stateful = self._stage_payload_by_uri(stateful_uri)
168
169            logging.info('Staged %s payload from %s at %s.', payload_type,
170                         payload_uri, staged_uri)
171            logging.info('Staged stateful from %s at %s.', stateful_uri,
172                         staged_stateful)
173            return staged_uri, staged_stateful
174
175
176    @staticmethod
177    def _get_stateful_uri(build_uri):
178        """Returns a complete GS URI of a stateful update given a build path."""
179        return '/'.join([build_uri.rstrip('/'), 'stateful.tgz'])
180
181
182    def _payload_to_stateful_uri(self, payload_uri):
183        """Given a payload GS URI, returns the corresponding stateful URI."""
184        build_uri = payload_uri.rpartition('/')[0]
185        if build_uri.endswith('payloads'):
186            build_uri = build_uri.rpartition('/')[0]
187        return self._get_stateful_uri(build_uri)
188
189
190    def _get_hostlog_file(self, filename, pid):
191        """Return the hostlog file location.
192
193        This hostlog file contains the update engine events that were fired
194        during the update. The pid is returned from dev_server.auto_update().
195
196        @param filename: The partial filename to look for.
197        @param pid: The pid of the update.
198
199        """
200        hosts = [self._host.hostname, self._host.ip]
201        for host in hosts:
202            hostlog = '%s_%s_%s' % (filename, host, pid)
203            file_url = os.path.join(self.job.resultdir,
204                                    dev_server.AUTO_UPDATE_LOG_DIR,
205                                    hostlog)
206            if os.path.exists(file_url):
207                logging.info('Hostlog file to be used for checking update '
208                             'steps: %s', file_url)
209                return file_url
210        raise error.TestFail('Could not find %s for pid %s' % (filename, pid))
211
212
213    def _dump_update_engine_log(self, test_platform):
214        """Dumps relevant AU error log."""
215        try:
216            error_log = test_platform.get_update_log(80)
217            logging.error('Dumping snippet of update_engine log:\n%s',
218                          snippet(error_log))
219        except Exception:
220            # Mute any exceptions we get printing debug logs.
221            pass
222
223
224    def _report_perf_data(self, perf_file):
225        """Reports performance and resource data.
226
227        Currently, performance attributes are expected to include 'rss_peak'
228        (peak memory usage in bytes).
229
230        @param perf_file: A file with performance metrics.
231        """
232        logging.debug('Reading perf results from %s.', perf_file)
233        try:
234            with open(perf_file, 'r') as perf_file_handle:
235                perf_data = json.load(perf_file_handle)
236        except Exception as e:
237            logging.warning('Error while reading the perf data file: %s', e)
238            return
239
240        rss_peak = perf_data.get('rss_peak')
241        if rss_peak:
242            rss_peak_kib = rss_peak / 1024
243            logging.info('Peak memory (RSS) usage on DUT: %d KiB', rss_peak_kib)
244            self.output_perf_value(description='mem_usage_peak',
245                                   value=int(rss_peak_kib),
246                                   units='KiB',
247                                   higher_is_better=False)
248        else:
249            logging.warning('No rss_peak key in JSON returned by update '
250                            'engine perf script.')
251
252        update_time = perf_data.get('update_length')
253        if update_time:
254            logging.info('Time it took to update: %s seconds', update_time)
255            self.output_perf_value(description='update_length',
256                                   value=int(update_time), units='sec',
257                                   higher_is_better=False)
258        else:
259            logging.warning('No data about how long it took to update was '
260                            'found.')
261
262
263    def _verify_active_slot_changed(self, source_active_slot,
264                                    target_active_slot, source_release,
265                                    target_release):
266        """Make sure we're using a different slot after the update."""
267        if target_active_slot == source_active_slot:
268            err_msg = 'The active image slot did not change after the update.'
269            if source_release is None:
270                err_msg += (
271                    ' The DUT likely rebooted into the old image, which '
272                    'probably means that the payload we applied was '
273                    'corrupt.')
274            elif source_release == target_release:
275                err_msg += (' Given that the source and target versions are '
276                            'identical, either it (1) rebooted into the '
277                            'old image due to a bad payload or (2) we retried '
278                            'the update after it failed once and the second '
279                            'attempt was written to the original slot.')
280            else:
281                err_msg += (' This is strange since the DUT reported the '
282                            'correct target version. This is probably a system '
283                            'bug; check the DUT system log.')
284            raise error.TestFail(err_msg)
285
286        logging.info('Target active slot changed as expected: %s',
287                     target_active_slot)
288
289
290    def _verify_version(self, expected, actual):
291        """Compares actual and expected versions."""
292        if expected != actual:
293            err_msg = 'Failed to verify OS version. Expected %s, was %s' % (
294                expected, actual)
295            logging.error(err_msg)
296            raise error.TestFail(err_msg)
297
298    def run_update_test(self, cros_device, test_conf):
299        """Runs the update test, collects perf stats, checks expected version.
300
301        @param cros_device: The device under test.
302        @param test_conf: A dictionary containing test configuration values.
303
304        """
305        # Record the active root partition.
306        source_active_slot = cros_device.get_active_slot()
307        logging.info('Source active slot: %s', source_active_slot)
308
309        source_release = test_conf['source_release']
310        target_release = test_conf['target_release']
311
312        cros_device.copy_perf_script_to_device(self.bindir)
313        try:
314            # Update the DUT to the target image.
315            pid = cros_device.install_target_image(test_conf[
316                'target_payload_uri'])
317
318            # Verify the hostlog of events was returned from the update.
319            file_url = self._get_hostlog_file(self._DEVSERVER_HOSTLOG_ROOTFS,
320                                              pid)
321
322            # Call into base class to compare expected events against hostlog.
323            self.verify_update_events(source_release, file_url)
324        except:
325            logging.fatal('ERROR: Failure occurred during the target update.')
326            raise
327
328        # Collect perf stats about this update run.
329        perf_file = cros_device.get_perf_stats_for_update(self.job.resultdir)
330        if perf_file is not None:
331            self._report_perf_data(perf_file)
332
333        # Device is updated. Check that we are running the expected version.
334        if cros_device.oobe_triggers_update():
335            # If DUT automatically checks for update during OOBE (e.g
336            # rialto), this update check fires before the test can get the
337            # post-reboot update check. So we just check the version from
338            # lsb-release.
339            logging.info('Skipping post reboot update check.')
340            self._verify_version(target_release, cros_device.get_cros_version())
341        else:
342            # Verify we have a hostlog for the post-reboot update check.
343            file_url = self._get_hostlog_file(self._DEVSERVER_HOSTLOG_REBOOT,
344                                              pid)
345
346            # Call into base class to compare expected events against hostlog.
347            self.verify_update_events(source_release, file_url, target_release)
348
349        self._verify_active_slot_changed(source_active_slot,
350                                         cros_device.get_active_slot(),
351                                         source_release, target_release)
352
353        logging.info('Update successful, test completed')
354
355
356    def run_once(self, host, test_conf):
357        """Performs a complete auto update test.
358
359        @param host: a host object representing the DUT.
360        @param test_conf: a dictionary containing test configuration values.
361
362        @raise error.TestError if anything went wrong with setting up the test;
363               error.TestFail if any part of the test has failed.
364        """
365        self._host = host
366        logging.debug('The test configuration supplied: %s', test_conf)
367
368        self._autotest_devserver = self._get_least_loaded_devserver(test_conf)
369        self._stage_payloads_onto_devserver(test_conf)
370
371        # Get an object representing the CrOS DUT.
372        cros_device = chromiumos_test_platform.ChromiumOSTestPlatform(
373            self._host, self._autotest_devserver, self.job.resultdir)
374
375        # Install source image
376        cros_device.install_source_image(test_conf['source_payload_uri'])
377        cros_device.check_login_after_source_update()
378
379        # Start the update to the target image.
380        try:
381            self.run_update_test(cros_device, test_conf)
382        except update_engine_test.UpdateEngineEventMissing:
383            self._dump_update_engine_log(cros_device)
384            raise
385
386        # Check we can login after the update.
387        cros_device.check_login_after_target_update()
388