1# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob, logging, os, tempfile, threading, time
6from autotest_lib.client.bin import test
7from autotest_lib.client.common_lib import error, utils
8
9class PlatformDescriptor(object):
10    '''
11    An object to keep platform specific information.
12
13    @num_cores - number of CPU cores in this platform
14    @max_cpu_freq - maximum frequency the CPU can be running at
15    @min_cpu_freq - minimal frequency the CPU can be running at
16    '''
17
18    def __init__(self, num_cores, max_cpu_freq, min_cpu_freq):
19        self.num_cores = num_cores
20        self.max_cpu_freq = max_cpu_freq
21        self.min_cpu_freq = min_cpu_freq
22
23
24# Base name of the sysfs file where CPU temperature is reported. The file is
25# exported by the temperature monitor driver and is located in the appropriate
26# device's subtree. We use the file name to locate the subtree, only one file
27# with this name is expected to exist in /sys. The ext_ prefix indicates that
28# this is a reading off a sensor located next to the CPU. This facility could
29# be not available on some platforms, the test would need to be updated to
30# accommodate those.
31#
32# The `standard' temperature reading available through
33# /sys/class/hwmon/hwmon0/device/temperature does not represent the actual CPU
34# temperature and when the CPU load changes, the 'standard' temperature
35# reading changes much slower and not to such a large extent than the value in
36# */ext_temperature.
37EXT_TEMP_SENSOR_FILE = 'ext_temperature'
38
39# Base name of the file where the throttling temperature is set (if CPU temp
40# exceeds this value, clock throttling starts).
41THROTTLE_EXT_LIMIT_FILE = 'throttle_ext_limit'
42
43# Root directory for all sysfs information about the CPU(s).
44CPU_INFO_ROOT = '/sys/devices/system/cpu'
45
46# Template to get access to the directory/file containing current per core
47# information.
48PER_CORE_FREQ_TEMPLATE = CPU_INFO_ROOT + '/cpu%d/cpufreq/%s'
49
50# Base name for the temporary files used by this test.
51TMP_FILE_TEMPLATE = '/tmp/thermal_'
52
53# Temperature difference expected to be caused by increased CPU activity.
54DELTA = 3.0
55
56# Name of the file controlling core's clocking discipline.
57GOVERNOR = 'scaling_governor'
58
59# Name of the file providing space separated list of available clocking
60# disciplines.
61AVAILABLE_GOVERNORS = 'scaling_available_governors'
62
63def clean_up(obj):
64    '''
65    A function to register with the autotest engine to ensure proper cleanup.
66
67    It will be called after the test has run, either completing successfully
68    or throwing an exception.
69    '''
70
71    obj.cleanup()
72
73
74class power_Thermal(test.test):
75    version = 1
76
77
78    def _cpu_heater(self):
79        '''
80        A function to execute some code to heat up the target.
81
82        This function is run on a separate thread, all it does - opens a file
83        for writing, writes it with 100K characters, closes and removes the
84        file, it is running in a tight loop until the stop_all_workers flag
85        turns True.
86
87        Multiple threads are spawn to cause maximum CPU activity.
88        '''
89
90        (handle, fname) = tempfile.mkstemp(
91            prefix=os.path.basename(TMP_FILE_TEMPLATE),
92            dir=os.path.dirname(TMP_FILE_TEMPLATE))
93        os.close(handle)
94        os.remove(fname)
95        while not self.stop_all_workers:
96            f = open(fname, 'w')
97            f.write('x' * 100000)
98            f.close()
99            os.remove(fname)
100
101
102    def _add_heater_thread(self):
103        '''Add a thread to run another instance of _cpu_heater().'''
104
105        thread_count = len(self.worker_threads)
106        logging.info('adding thread number %d' % thread_count)
107        new_thread = threading.Thread(target=self._cpu_heater)
108        self.worker_threads.append(new_thread)
109        new_thread.daemon = True
110        new_thread.start()
111
112
113    def _throttle_count(self):
114        '''
115        Return current throttling status of all cores.
116
117        The return integer value is the sum of all cores' throttling status.
118        When the sum is equal the core number - all cores are throttling.
119        '''
120
121        count = 0
122        for cpu in range(self.pl_desc.num_cores):
123            count += int(utils.read_file(
124                    PER_CORE_FREQ_TEMPLATE % (cpu, 'throttle')))
125        return count
126
127
128    def _cpu_freq(self, cpu):
129        '''Return current clock frequency of a CPU, integer in Kilohertz.'''
130
131        return int(utils.read_file(
132                PER_CORE_FREQ_TEMPLATE % (cpu, 'cpuinfo_cur_freq')))
133
134
135    def _cpu_temp(self):
136        '''Return current CPU temperature, a float value.'''
137
138        return float(utils.read_file(
139                os.path.join(self.temperature_data_path, EXT_TEMP_SENSOR_FILE)))
140
141
142    def _throttle_limit(self):
143        '''
144        Return current CPU throttling temperature threshold.
145
146        If CPU temperature exceeds this value, clock throttling is activated,
147        causing CPU slowdown.
148
149        Returns the limit as a float value.
150        '''
151
152        return float(utils.read_file(
153                os.path.join(self.temperature_data_path,
154                             THROTTLE_EXT_LIMIT_FILE)))
155
156
157    def _set_throttle_limit(self, new_limit):
158        '''
159        Set current CPU throttling temperature threshold.
160
161        The passed in float value is rounded to the nearest integer.
162        '''
163
164        utils.open_write_close(
165            os.path.join(
166                self.temperature_data_path, THROTTLE_EXT_LIMIT_FILE),
167            '%d' % int(round(new_limit)))
168
169
170    def _check_freq(self):
171        '''Verify that all CPU clocks are in range for this target.'''
172
173        for cpu in range(self.pl_desc.num_cores):
174            freq = self._cpu_freq(cpu)
175            if self.pl_desc.min_cpu_freq <= freq <= self.pl_desc.max_cpu_freq:
176                return
177            raise error.TestError('Wrong cpu %d frequency reading %d' % (
178                    cpu, freq))
179
180
181    def _get_cpu_freq_raised(self):
182        '''
183        Bring all cores clock to max frequency.
184
185        This function uses the scaling_governor mechanism to force the cores
186        to run at maximum frequency, writing the string 'performance' into
187        each core's governor file.
188
189        The current value (if not 'performance') is preserved to be restored
190        in the end of the test.
191
192        Returns a dictionary where keys are the core numbers and values are
193        the preserved governor setting.
194
195        raises TestError in case 'performance' setting is not allowed on any
196               of the cores, or the clock frequency does not reach max on any
197               of the cores in 1 second.
198        '''
199
200        rv = {}
201        for cpu in range(self.pl_desc.num_cores):
202            target = 'performance'
203            gov_file = PER_CORE_FREQ_TEMPLATE % (cpu, GOVERNOR)
204            current_gov = utils.read_file(gov_file).strip()
205            available_govs = utils.read_file(PER_CORE_FREQ_TEMPLATE % (
206                    cpu, AVAILABLE_GOVERNORS)).split()
207
208            if current_gov != target:
209                if not target in available_govs:
210                    raise error.TestError('core %d does not allow setting %s'
211                                          % (cpu, target))
212                logging.info('changing core %d governor from %s to %s' % (
213                        cpu, current_gov, target))
214                utils.open_write_close(gov_file, target)
215                rv[cpu] = current_gov
216
217        for _ in range(2):  # Wait for no more than 1 second
218            for cpu in range(self.pl_desc.num_cores):
219                if self._cpu_freq(cpu) != self.pl_desc.max_cpu_freq:
220                    break
221            else:
222                return rv
223
224        freqs = []
225        for cpu in range(self.pl_desc.num_cores):
226            freqs.append('%d' % self._cpu_freq(cpu))
227        raise error.TestError('failed to speed up some CPU clocks: %s' %
228                              ', '.join(freqs))
229
230
231    def _get_cpu_temp_raised(self):
232        '''
233        Start more threads to increase CPU temperature.
234
235        This function starts 10 threads and waits till either of the two
236        events happen:
237
238        - the throttling is activated (the threshold is expected to be set at
239          DELTA/2 above the temperature when the test started). This is
240          considered a success, the function returns.
241
242        - the temperature raises DELTA degrees above the original temperature
243          but throttling does not start. This is considered an overheating
244          failure, a test error is raised.
245
246        If the temperature does not reach the DELTA and throttling does not
247        start in 30 seconds - a test error is also raised in this case.
248        '''
249
250        base_temp = self._cpu_temp()
251        # Start 10 more cpu heater threads
252        for _ in range(10):
253            self._add_heater_thread()
254
255        # Wait 30 seconds for the temp to raise DELTA degrees or throttling to
256        # start
257        for count in range(30):
258            new_temp = self._cpu_temp()
259            if new_temp - base_temp >= DELTA:
260                raise error.TestError(
261                    'Reached temperature of %2.1fC in %d'
262                    ' seconds, no throttling.'
263                    % count)
264            if self._throttle_count() == self.pl_desc.num_cores:
265                logging.info('full throttle after %d seconds' % count)
266                return
267            time.sleep(1)
268        raise error.TestError(
269            'failed to raise CPU temperature from %s (reached %s), '
270            '%d cores throttled' % (
271                str(base_temp), str(new_temp), self._throttle_count()))
272
273    def _get_platform_descriptor(self):
274        '''Fill out the platform descriptor to be used by the test.'''
275
276        present = utils.read_file(os.path.join(CPU_INFO_ROOT, 'present'))
277        if present.count('-') != 1:
278            raise error.TestError(
279                "can't determine number of cores from %s" % present)
280        (min_core, max_core) = tuple(int(x) for x in present.split('-'))
281        min_freq = int(utils.read_file(
282            PER_CORE_FREQ_TEMPLATE % (0, 'cpuinfo_min_freq')))
283        max_freq = int(utils.read_file(
284            PER_CORE_FREQ_TEMPLATE % (0, 'cpuinfo_max_freq')))
285
286        return PlatformDescriptor(max_core - min_core + 1, max_freq, min_freq)
287
288
289    def _prepare_test(self):
290        '''Prepare test: check initial conditions and set variables.'''
291
292        ext_temp_path = utils.system_output(
293            'find /sys -name %s' % EXT_TEMP_SENSOR_FILE).splitlines()
294        if len(ext_temp_path) != 1:
295            raise error.TestError('found %d sensor files' % len(ext_temp_path))
296
297        self.temperature_data_path = os.path.dirname(ext_temp_path[0])
298
299        self.stop_all_workers = False
300
301        self.pl_desc = self._get_platform_descriptor()
302
303        # Verify CPU frequency is in range.
304        self._check_freq()
305
306        # Make sure we are not yet throttling.
307        if self._throttle_count():
308            raise error.TestError('Throttling active before test started')
309
310        # Remember throttling level setting before test started.
311        self.preserved_throttle_limit = self._throttle_limit()
312
313        if self.preserved_throttle_limit - self._cpu_temp() < 4 * DELTA:
314            raise error.TestError('Target is too hot: %s C' % str(
315                    self._cpu_temp()))
316
317        # list to keep track of threads started to heat up CPU.
318        self.worker_threads = []
319
320        # Dictionary of saved cores' scaling governor settings.
321        self.saved_governors = {}
322
323        self.register_after_iteration_hook(clean_up)
324
325
326    def run_once(self):
327        self._prepare_test()
328        logging.info('starting temperature is %s' % str(self._cpu_temp()))
329        logging.info('starting frequency is %s' % str(self._cpu_freq(0)))
330
331        self.saved_governors = self._get_cpu_freq_raised()
332        self._set_throttle_limit(self._cpu_temp() + DELTA/2)
333        self._get_cpu_temp_raised()
334        self._set_throttle_limit(self.preserved_throttle_limit)
335
336        # Half a second after restoring the throttling limit is plenty for
337        # throttling to stop.
338        time.sleep(.5)
339        if self._throttle_count():
340            raise error.TestError('Throttling did not stop')
341
342        logging.info('ending temperature is %s' % str(self._cpu_temp()))
343        logging.info('ending frequency is %s' % str(self._cpu_freq(0)))
344
345
346    def cleanup(self):
347        self.stop_all_workers = True
348        self._set_throttle_limit(self.preserved_throttle_limit)
349        logging.info('stopping %d thread(s)' % len(self.worker_threads))
350        runaway_threads = 0
351        while self.worker_threads:
352            t = self.worker_threads.pop()
353            t.join(.5)
354            if t.isAlive():
355                runaway_threads += 1
356        if runaway_threads:
357            for f in glob.glob('%s*' % TMP_FILE_TEMPLATE):
358                logging.info('removing %s' % f)
359                os.remove(f)
360            raise error.TestError(
361                'Failed to join %d worker thread(s)' % runaway_threads)
362
363        if not self.saved_governors:
364            return
365
366        for (cpu, gov) in self.saved_governors.iteritems():
367            gov_file = PER_CORE_FREQ_TEMPLATE % (cpu, GOVERNOR)
368            logging.info('restoring core %d governor to %s' % (cpu, gov))
369            utils.open_write_close(gov_file, gov)
370        self.saved_governors = {}
371