1# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging, threading, time
6
7from autotest_lib.client.bin import utils
8from autotest_lib.client.cros import service_stopper
9
10
11# List of thermal throttling services that should be disabled.
12# - temp_metrics for link.
13# - thermal for daisy, snow, pit etc.
14# TODO(ihf): cpu_quiet on nyan isn't a service. We still need to disable it
15#            on nyan. See crbug.com/357457.
16_THERMAL_SERVICES = ['temp_metrics', 'thermal']
17
18
19class PerfControl(object):
20    """
21    Provides methods for setting the performance mode of a device.
22
23    In particular it verifies the machine is idle and cold and tries to set
24    it into a consistent, high performance state during initialization.
25
26    Furthermore it monitors the state of the machine (in particular
27    temperature) and verifies that nothing bad happened along the way.
28
29    Example usage:
30
31    with PerfControl() as pc:
32        if not pc.verify_is_valid():
33            raise error.TestError(pc.get_error_reason())
34        # Do all performance testing.
35        ...
36        if not pc.verify_is_valid():
37            raise error.TestError(pc.get_error_reason())
38    """
39    def __init__(self):
40        self._service_stopper = None
41        # Keep a copy of the current state for cleanup.
42        self._temperature_init = utils.get_current_temperature_max()
43        self._temperature_critical = utils.get_temperature_critical()
44        self._original_governors = utils.set_high_performance_mode()
45        self._error_reason = None
46        if not utils.wait_for_idle_cpu(60.0, 0.1):
47            self._error_reason = 'Could not get idle CPU.'
48            return
49        if not utils.wait_for_cool_machine():
50            self._error_reason = 'Could not get cold machine.'
51            return
52        self._temperature_cold = utils.get_current_temperature_max()
53        self._temperature_max = self._temperature_cold
54        threading.Thread(target=self._monitor_performance_state).start()
55        # Should be last just in case we had a runaway process.
56        self._stop_thermal_throttling()
57
58
59    def __enter__(self):
60        return self
61
62
63    def __exit__(self, _type, value, traceback):
64        # First thing restart thermal management.
65        self._restore_thermal_throttling()
66        utils.restore_scaling_governor_states(self._original_governors)
67
68
69    def get_error_reason(self):
70        """
71        Returns an error reason string if we encountered problems to pass
72        on to harness/wmatrix.
73        """
74        return self._error_reason
75
76
77    def verify_is_valid(self):
78        """
79        For now we declare performance results as valid if
80        - we did not have an error before.
81        - the monitoring thread never saw temperatures too close to critical.
82
83        TODO(ihf): Search log files for thermal throttling messages like in
84                   src/build/android/pylib/perf/thermal_throttle.py
85        """
86        if self._error_reason:
87            return False
88        temperature_bad = self._temperature_critical - 1.0
89        logging.info("Max observed temperature = %.1f'C (bad limit = %.1f'C)",
90                     self._temperature_max, temperature_bad)
91        if (self._temperature_max > temperature_bad):
92            self._error_reason = 'Machine got hot during testing.'
93            return False
94        return True
95
96
97    def _monitor_performance_state(self):
98        """
99        Checks machine temperature once per second.
100        TODO(ihf): make this more intelligent with regards to governor,
101                   CPU, GPU and maybe zram as needed.
102        """
103        while True:
104            time.sleep(1)
105            current_temperature = utils.get_current_temperature_max()
106            self._temperature_max = max(self._temperature_max,
107                                        current_temperature)
108            # TODO(ihf): Remove this spew once PerfControl is stable.
109            logging.info('PerfControl CPU temperature = %.1f',
110                          current_temperature)
111
112
113    def _stop_thermal_throttling(self):
114        """
115        If exist on the platform/machine it stops the different thermal
116        throttling scripts from running.
117        Warning: this risks abnormal behavior if machine runs in high load.
118        """
119        self._service_stopper = service_stopper.ServiceStopper(
120                                                    _THERMAL_SERVICES)
121
122
123    def _restore_thermal_throttling(self):
124        """
125        Restores the original thermal throttling state.
126        """
127        if self._service_stopper:
128            self._service_stopper.restore_services()
129