1# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import pprint
7import time
8import re
9
10from autotest_lib.client.common_lib import error
11from autotest_lib.server import autotest
12from autotest_lib.server.cros.faft.cr50_test import Cr50Test
13
14
15class firmware_Cr50DeviceState(Cr50Test):
16    """Verify Cr50 tracks the EC and AP state correctly.
17
18    Put the device through S0, S0ix, S3, and G3. Cr50 responds to these state
19    changes by enabling/disabling uart and changing its suspend type. Verify
20    that none of these cause any interrupt storms on Cr50. Make sure that there
21    aren't any interrupt storms and that Cr50 enters regular or deep sleep a
22    reasonable amount of times.
23    """
24    version = 1
25
26    DEEP_SLEEP_STEP_SUFFIX = ' Num Deep Sleep Steps'
27
28    KEY_CMD_END_TIME = -4
29    # Use negative numbers to keep track of counts not in the IRQ list.
30    KEY_DEEP_SLEEP = -3
31    KEY_TIME = -2
32    KEY_RESET = -1
33    KEY_REGULAR_SLEEP = 112
34    INT_NAME = {
35        KEY_RESET  : 'Reset Count',
36        KEY_DEEP_SLEEP  : 'Deep Sleep Count',
37        KEY_TIME  : 'Cr50 Time',
38        4 : 'HOST_CMD_DONE',
39        81  : 'GPIO0',
40        98  : 'GPIO1',
41        103 : 'I2CS WRITE',
42        KEY_REGULAR_SLEEP : 'PMU WAKEUP',
43        113 : 'AC present FED',
44        114 : 'AC present RED',
45        124 : 'RBOX_INTR_PWRB',
46        130 : 'SPS CS deassert',
47        138 : 'SPS RXFIFO LVL',
48        159 : 'SPS RXFIFO overflow',
49        160 : 'EVENT TIMER',
50        174 : 'CR50_RX_SERVO_TX',
51        177 : 'CR50_TX_SERVO_RX',
52        181 : 'AP_TX_CR50_RX',
53        184 : 'AP_RX_CR50_TX',
54        188 : 'EC_TX_CR50_RX',
55        191 : 'EC_RX_CR50_TX',
56        193 : 'USB',
57    }
58    IGNORED_KEYS = [KEY_CMD_END_TIME]
59    SLEEP_KEYS = [ KEY_REGULAR_SLEEP, KEY_DEEP_SLEEP ]
60    # USB, AP UART, and EC UART should be disabled if ccd is disabled.
61    CCD_KEYS = [ 181, 184, 188, 191, 193 ]
62
63    # Cr50 won't enable any form of sleep until it has been up for 20 seconds.
64    SLEEP_DELAY = 20
65    # The time in seconds to wait in each state. Wait one minute so it's long
66    # enough for cr50 to settle into whatever state. 60 seconds is also long
67    # enough that cr50 has enough time to enter deep sleep twice, so we can
68    # catch extra wakeups.
69    SLEEP_TIME = 60
70    SHORT_WAIT = 5
71    CONSERVATIVE_WAIT_TIME = SLEEP_TIME * 2
72    # Cr50 should wake up twice per second while in regular sleep
73    SLEEP_RATE = 2
74
75    DEEP_SLEEP_MAX = 2
76    ARM = 'ARM '
77    # If there are over 100,000 interrupts, it is an interrupt storm.
78    DEFAULT_COUNTS = [0, 100000]
79    # A dictionary of ok count values for each irq that shouldn't follow the
80    # DEFAULT_COUNTS range.
81    EXPECTED_IRQ_COUNT_RANGE = {
82        KEY_RESET : [0, 0],
83        KEY_DEEP_SLEEP : [0, DEEP_SLEEP_MAX],
84        KEY_TIME : [0, CONSERVATIVE_WAIT_TIME],
85        'S0ix' + DEEP_SLEEP_STEP_SUFFIX : [0, 0],
86        # Cr50 may enter deep sleep an extra time, because of how the test
87        # collects taskinfo counts. Just verify that it does enter deep sleep
88        'S3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
89        'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
90        # ARM devices don't enter deep sleep in S3
91        ARM + 'S3' + DEEP_SLEEP_STEP_SUFFIX : [0, 0],
92        ARM + 'G3' + DEEP_SLEEP_STEP_SUFFIX : [1, 2],
93        # Regular sleep is calculated based on the cr50 time
94    }
95
96    # Each line relevant taskinfo output should be 13 characters long with only
97    # digits or spaces. Use this information to make sure every taskinfo command
98    # gets the full relevant output. There are 4 characters for the irq number
99    # and 9 for the count.
100    GET_TASKINFO = ['IRQ counts by type:\s+(([\d ]{13}\r\n)+)Service calls']
101
102    START = ''
103    INCREASE = '+'
104    DS_RESUME = 'DS'
105
106    MEM_SLEEP_PATH = '/sys/power/mem_sleep'
107    MEM_SLEEP_S0IX = 'echo %s > %s ; sleep 1' % ('s2idle', MEM_SLEEP_PATH)
108    MEM_SLEEP_S3 = 'echo %s > %s ; sleep 1' % ('deep', MEM_SLEEP_PATH)
109    POWER_STATE_PATH = '/sys/power/state'
110    POWER_STATE_S0IX = 'echo %s > %s' % ('freeze', POWER_STATE_PATH)
111    POWER_STATE_S3 = 'echo %s > %s' % ('mem', POWER_STATE_PATH)
112
113
114    def initialize(self, host, cmdline_args, full_args):
115        super(firmware_Cr50DeviceState, self).initialize(host, cmdline_args,
116                                                         full_args)
117        # Don't bother if there is no Chrome EC or if EC hibernate doesn't work.
118        if not self.check_ec_capability():
119            raise error.TestNAError("Nothing needs to be tested on this device")
120
121        self.generate_suspend_commands()
122
123
124    def generate_suspend_commands(self):
125        """Generate the S3 and S0ix suspend commands"""
126        s0ix_cmds = []
127        s3_cmds = []
128        if self.host.path_exists(self.MEM_SLEEP_PATH):
129            s0ix_cmds.append(self.MEM_SLEEP_S0IX)
130            s3_cmds.append(self.MEM_SLEEP_S3)
131        s0ix_cmds.append(self.POWER_STATE_S0IX)
132        s3_cmds.append(self.POWER_STATE_S3)
133        self._s0ix_cmds = '; '.join(s0ix_cmds)
134        self._s3_cmds = '; '.join(s3_cmds)
135        logging.info('S0ix cmd: %r', self._s0ix_cmds)
136        logging.info('S3 cmd: %r', self._s3_cmds)
137
138
139    def log_sleep_debug_information(self):
140        """Log some information used for debugging sleep issues"""
141        logging.debug(
142            self.cr50.send_command_retry_get_output('sleepmask',
143                                                    ['sleepmask.*>'],
144                                                    safe=True)[0])
145        logging.debug(
146            self.cr50.send_command_retry_get_output('sysinfo',
147                                                    ['sysinfo.*>'],
148                                                    safe=True)[0])
149
150
151    def get_taskinfo_output(self):
152        """Return a dict with the irq numbers as keys and counts as values"""
153        output = self.cr50.send_command_retry_get_output('taskinfo',
154            self.GET_TASKINFO, safe=True, retries=10)[0][1].strip()
155        logging.debug(output)
156        return output
157
158
159    def get_irq_counts(self):
160        """Return a dict with the irq numbers as keys and counts as values"""
161        irq_counts = { self.KEY_REGULAR_SLEEP : 0 }
162        # Running all of these commands may take a while. Track how much time
163        # commands are running, so we can offset the cr50 time to set sleep
164        # expectations
165        start_cmd_time = int(self.cr50.gettime())
166        irq_counts[self.KEY_TIME] = start_cmd_time
167
168        output = self.get_taskinfo_output()
169        irq_list = re.findall('\d+\s+\d+\r', output)
170        # Make sure the regular sleep irq is in the dictionary, even if cr50
171        # hasn't seen any interrupts. It's important the test sees that there's
172        # never an interrupt.
173        for irq_info in irq_list:
174            logging.debug(irq_info)
175            num, count = irq_info.split()
176            irq_counts[int(num)] = int(count)
177        irq_counts[self.KEY_RESET] = int(self.cr50.get_reset_count())
178        irq_counts[self.KEY_DEEP_SLEEP] = int(self.cr50.get_deep_sleep_count())
179        # Log some information, so we can debug issues with sleep.
180        self.log_sleep_debug_information()
181        # Track when the commands end, so the test can ignore the time spent
182        # running these console commands.
183        end_cmd_time = int(self.cr50.gettime())
184        irq_counts[self.KEY_CMD_END_TIME] = end_cmd_time
185        logging.info('Commands finished in %d seconds',
186                     end_cmd_time - start_cmd_time)
187        return irq_counts
188
189
190    def get_expected_count(self, irq_key, cr50_time, idle):
191        """Get the expected irq increase for the given irq and state
192
193        Args:
194            irq_key: the irq int
195            cr50_time: the cr50 time in seconds
196            idle: Cr50 should enter regular sleep while the device is idle.
197
198        Returns:
199            A list with the expected irq count range [min, max]
200        """
201        # CCD will prevent sleep
202        if self.ccd_enabled and (irq_key in self.SLEEP_KEYS or
203            self.DEEP_SLEEP_STEP_SUFFIX in str(irq_key)):
204            return [0, 0]
205        if irq_key == self.KEY_REGULAR_SLEEP:
206            # If cr50_time is really low, we probably woke cr50 up using
207            # taskinfo, which would be a pmu wakeup.
208            if cr50_time < 5:
209                return [0, 1]
210            # Only enforce the minimum regular sleep count if the device is
211            # idle. Cr50 may not enter regular sleep during power state
212            # transitions.
213            if idle:
214                min_count = max(cr50_time - self.SLEEP_DELAY, 0)
215            else:
216                min_count = 0
217            # Check that cr50 isn't continuously entering and exiting sleep.
218            # The PMU wakeups should happen around twice a second. Depending
219            # on TPM activity it may occur more often. Add 2 to the multiplier
220            # to allow for extra wakeups. This is mostly to catch issues that
221            # cause cr50 to wake up 100 times a second
222            max_count = cr50_time * (self.SLEEP_RATE + 2)
223            return [min_count, max_count]
224        # If ccd is disabled, ccd irq counts should not increase.
225        if not self.ccd_enabled and (irq_key in self.CCD_KEYS):
226            return [0, 0]
227        return self.EXPECTED_IRQ_COUNT_RANGE.get(irq_key, self.DEFAULT_COUNTS)
228
229
230    def check_increase(self, irq_key, name, increase, expected_range):
231        """Verify the irq count is within the expected range
232
233        Args:
234            irq_key: the irq int
235            name: the irq name string
236            increase: the irq count
237            expected_range: A list with the valid irq count range [min, max]
238
239        Returns:
240            '' if increase is in the given range. If the increase isn't in the
241            range, it returns an error message.
242        """
243        min_count, max_count = expected_range
244        if min_count > increase or max_count < increase:
245            err_msg = '%s %s: %s not in range %s' % (name, irq_key, increase,
246                expected_range)
247            return err_msg
248        return ''
249
250
251    def get_step_events(self):
252        """Use the deep sleep counts to determine the step events"""
253        ds_counts = self.get_irq_step_counts(self.KEY_DEEP_SLEEP)
254        events = []
255        for i, count in enumerate(ds_counts):
256            if not i:
257                events.append(self.START)
258            elif count != ds_counts[i - 1]:
259                # If the deep sleep count changed, Cr50 recovered deep sleep
260                # and the irq counts are reset.
261                events.append(self.DS_RESUME)
262            else:
263                events.append(self.INCREASE)
264        return events
265
266
267    def get_irq_step_counts(self, irq_key):
268        """Get a list of the all of the step counts for the given irq"""
269        return [ irq_dict.get(irq_key, 0) for irq_dict in self.steps ]
270
271
272    def check_for_errors(self, state):
273        """Check for unexpected IRQ counts at each step.
274
275        Find the irq count errors and add them to run_errors.
276
277        Args:
278            state: The power state: S0, S0ix, S3, or G3.
279        """
280        num_steps = len(self.steps)
281        # Get all of the deep sleep counts
282        events = self.get_step_events()
283
284        irq_list = list(self.irqs)
285        irq_list.sort()
286
287        # Pad the start of the step names, so the names align with each step
288        # count.
289        irq_diff = ['%24s|%s|' % ('', '|'.join(self.step_names))]
290        step_errors = [ [] for i in range(num_steps) ]
291
292        end_cmd_times = self.get_irq_step_counts(self.KEY_CMD_END_TIME)
293        start_cmd_times = self.get_irq_step_counts(self.KEY_TIME)
294        cr50_time_diff = []
295        for i, start_time in enumerate(start_cmd_times):
296            # The test collects a lot of information using console commands.
297            # These take a long time to run and cr50 isn't idle. The idle
298            # time is the time this step started minus the time the last set of
299            # commands finished running.
300            if events[i] == self.INCREASE:
301                cr50_time_diff.append(start_time - end_cmd_times[i - 1])
302            else:
303                cr50_time_diff.append(start_time)
304
305        # Go through each irq and update its info in the progress dict
306        for irq_key in irq_list:
307            if irq_key in self.IGNORED_KEYS:
308                continue
309            name = self.INT_NAME.get(irq_key, 'Unknown')
310            # Print the IRQ name on the left of the column and the irq number
311            # on the right.
312            irq_progress_str = ['%-19s %3s' % (name, irq_key)]
313
314            irq_counts = self.get_irq_step_counts(irq_key)
315            for step, count in enumerate(irq_counts):
316                event = events[step]
317
318                # The deep sleep counts are not reset after deep sleep. Change
319                # the event to INCREASE.
320                if irq_key == self.KEY_DEEP_SLEEP and event == self.DS_RESUME:
321                    event = self.INCREASE
322
323                if event == self.INCREASE:
324                    count -= irq_counts[step - 1]
325
326                # Check that the count increase is within the expected value.
327                if event != self.START:
328                    step_name = self.step_names[step].strip()
329                    # TODO(b/153891388): raise actual error once the servo
330                    # character loss issue is fixed.
331                    if count < 0:
332                        raise error.TestNAError('%s test found negative %s '
333                                                'count %r (likely due to servo '
334                                                'dropping characters)' %
335                                                (step, step_name, count))
336                    expected_range = self.get_expected_count(irq_key,
337                            cr50_time_diff[step], idle='idle' in step_name)
338
339                    rv = self.check_increase(irq_key, name, count,
340                            expected_range)
341                    if rv:
342                        logging.info('Unexpected count in %s test: %s %s',
343                                     state, step_name, rv)
344                        step_errors[step].append(rv)
345
346                irq_progress_str.append(' %2s %8d' % (event, count))
347
348            # Separate each step count with '|'. Add '|' to the start and end of
349            # the line.
350            irq_diff.append('|%s|' % '|'.join(irq_progress_str))
351
352        errors = {}
353
354        ds_key = self.ARM if self.is_arm else ''
355        ds_key += state + self.DEEP_SLEEP_STEP_SUFFIX
356        expected_range = self.get_expected_count(ds_key, 0, False)
357        rv = self.check_increase(None, ds_key, events.count(self.DS_RESUME),
358                expected_range)
359        if rv:
360            logging.info('Unexpected count for %s %s', state, rv)
361            errors[ds_key] = rv
362        for i, step_error in enumerate(step_errors):
363            if step_error:
364                logging.error('Step %d errors:\n%s', i,
365                        pprint.pformat(step_error))
366                step = '%s step %d %s' % (state, i, self.step_names[i].strip())
367                errors[step] = step_error
368
369        logging.info('DIFF %s IRQ Counts:\n%s', state, '\n'.join(irq_diff))
370        if errors:
371            logging.info('ERRORS %s IRQ Counts:\n%s', state,
372                    pprint.pformat(errors))
373            self.run_errors.update(errors)
374
375
376    def ap_is_on_after_power_button_press(self):
377        """Returns True if the AP is on after pressing the power button"""
378        # TODO (mruthven): use self.servo.power_short_press() once kukui power
379        # button issues are figured out.
380        self.servo.power_key(1)
381        # Give the AP some time to turn on
382        time.sleep(self.cr50.SHORT_WAIT)
383        return self.cr50.ap_is_on()
384
385
386    def trigger_s0(self):
387        """Press the power button so the DUT will wake up."""
388        if self.ap_is_on_after_power_button_press():
389            return
390        # Try a second time to see if the AP comes up.
391        if not self.ap_is_on_after_power_button_press():
392            raise error.TestError('Could not wake the AP using power button')
393        logging.warning('Had to press power button twice to wake the AP')
394
395
396    def enter_state(self, state):
397        """Get the command to enter the power state"""
398        block = True
399        if state == 'S0':
400            self.trigger_s0()
401        else:
402            if state == 'S0ix':
403                full_command = self._s0ix_cmds
404                block = False
405            elif state == 'S3':
406                full_command = self._s3_cmds
407                block = False
408            elif state == 'G3':
409                full_command = 'poweroff'
410            self.faft_client.system.run_shell_command(full_command, block)
411
412        time.sleep(self.SHORT_WAIT);
413        # check state transition
414        if not self.wait_power_state(state, self.SHORT_WAIT):
415            raise error.TestFail('Platform failed to reach %s state.' % state)
416
417
418    def stage_irq_add(self, irq_dict, name=''):
419        """Add the current irq counts to the stored dictionary of irq info"""
420        self.steps.append(irq_dict)
421        self.step_names.append(name.center(12))
422        self.irqs.update(irq_dict.keys())
423        logging.info('%s:\n%s', name, pprint.pformat(irq_dict))
424
425
426    def reset_irq_counts(self):
427        """Reset the test IRQ counts"""
428        self.steps = []
429        self.step_names = []
430        self.irqs = set()
431
432
433    def run_transition(self, state):
434        """Enter the given power state and reenter s0
435
436        Enter the power state and return to S0. Wait long enough to ensure cr50
437        will enter sleep mode, so we can verify that as well.
438
439        Args:
440            state: the power state: S0ix, S3, or G3
441        """
442        self.reset_irq_counts()
443
444        # Enter the given state
445        self.enter_state(state)
446        self.stage_irq_add(self.get_irq_counts(), 'entered %s' % state)
447
448        logging.info('waiting %d seconds', self.SLEEP_TIME)
449        time.sleep(self.SLEEP_TIME)
450        # Nothing is really happening. Cr50 should basically be idle during
451        # SLEEP_TIME.
452        self.stage_irq_add(self.get_irq_counts(), 'idle in %s' % state)
453
454        # Return to S0
455        self.enter_state('S0')
456        self.stage_irq_add(self.get_irq_counts(), 'entered S0')
457
458        logging.info('waiting %d seconds', self.SLEEP_TIME)
459        time.sleep(self.SLEEP_TIME)
460
461        self.stage_irq_add(self.get_irq_counts(), 'idle in S0')
462
463
464    def verify_state(self, state):
465        """Verify cr50 behavior while running through the power state"""
466
467        try:
468            self.run_transition(state)
469        finally:
470            # reset the system to S0 no matter what happens
471            self.trigger_s0()
472
473        # Check that the progress of the irq counts seems reasonable
474        self.check_for_errors(state)
475
476
477    def is_arm_family(self):
478        """Returns True if the DUT is an ARM device."""
479        arch = self.host.run('arch').stdout.strip()
480        return arch in ['aarch64', 'armv7l']
481
482
483    def run_through_power_states(self):
484        """Go through S0ix, S3, and G3. Verify there are no interrupt storms"""
485        self._try_to_bring_dut_up()
486        self.run_errors = {}
487        self.ccd_str = 'ccd ' + ('enabled' if self.ccd_enabled else 'disabled')
488        logging.info('Running through states with %s', self.ccd_str)
489
490        self.cr50.get_ccdstate()
491        if not self.cr50.get_sleepmask() and self.ccd_enabled:
492            logging.info('Sleepmask is not keeping cr50 up with ccd enabled')
493            self.all_errors[self.ccd_str] = 'usb is not active with ccd enabled'
494            return
495
496        # Login before entering S0ix so cr50 will be able to enter regular sleep
497        client_at = autotest.Autotest(self.host)
498        client_at.run_test('login_LoginSuccess')
499
500        # Check if the device supports S0ix. The exit status will be 0 if it
501        # does 1 if it doesn't.
502        result = self.host.run('check_powerd_config --suspend_to_idle',
503                ignore_status=True)
504        if not result.exit_status:
505            self.verify_state('S0ix')
506
507        # Enter S3
508        self.verify_state('S3')
509
510        # Enter G3
511        self.verify_state('G3')
512        if self.run_errors:
513            self.all_errors[self.ccd_str] = self.run_errors
514
515
516    def run_once(self, host):
517        """Go through S0ix, S3, and G3. Verify there are no interrupt storms"""
518        self.all_errors = {}
519        self.host = host
520        self.is_arm = self.is_arm_family()
521        supports_dts_control = self.cr50.servo_dts_mode_is_valid()
522
523        if supports_dts_control:
524            self.cr50.ccd_disable(raise_error=True)
525
526        self.ccd_enabled = self.cr50.ccd_is_enabled()
527        self.run_through_power_states()
528
529        if supports_dts_control:
530            ccd_was_enabled = self.ccd_enabled
531            self.cr50.ccd_enable(raise_error=supports_dts_control)
532            self.ccd_enabled = self.cr50.ccd_is_enabled()
533            # If the first run had ccd disabled, and the test was able to enable
534            # ccd, run through the states again to make sure there are no issues
535            # come up when ccd is enabled.
536            if not ccd_was_enabled and self.ccd_enabled:
537                self.run_through_power_states()
538        else:
539            logging.info('Current setup only supports test with ccd %sabled.',
540                    'en' if self.ccd_enabled else 'dis')
541
542        self.trigger_s0()
543        if self.all_errors:
544            raise error.TestFail('Unexpected Device State: %s' %
545                    self.all_errors)
546        if not supports_dts_control:
547            raise error.TestNAError('Verified device state with %s. Please '
548                    'run with type c servo v4 to test full device state.' %
549                    self.ccd_str)
550