1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from __future__ import print_function
6
7import json
8import logging
9import os
10import re
11import shlex
12import shutil
13
14from contextlib import contextmanager
15
16from autotest_lib.client.common_lib import error
17from autotest_lib.server import test
18from autotest_lib.server import utils
19from autotest_lib.server.cros import telemetry_runner
20from autotest_lib.server.cros.crosperf import device_setup_utils
21from autotest_lib.site_utils import test_runner_utils
22
23WAIT_FOR_CMD_TIMEOUT_SECS = 60
24DUT_COMMON_SSH_OPTIONS = [
25        '-o StrictHostKeyChecking=no', '-o UserKnownHostsFile=/dev/null',
26        '-o BatchMode=yes', '-o ConnectTimeout=30',
27        '-o ServerAliveInterval=900', '-o ServerAliveCountMax=3',
28        '-o ConnectionAttempts=4', '-o Protocol=2'
29]
30DUT_SCP_OPTIONS = ' '.join(DUT_COMMON_SSH_OPTIONS)
31
32RSA_KEY = '-i %s' % test_runner_utils.TEST_KEY_PATH
33DUT_CHROME_RESULTS_DIR = '/usr/local/telemetry/src'
34
35TURBOSTAT_LOG = 'turbostat.log'
36CPUSTATS_LOG = 'cpustats.log'
37CPUINFO_LOG = 'cpuinfo.log'
38TOP_LOG = 'top.log'
39WAIT_TIME_LOG = 'wait_time.log'
40
41# Result Statuses
42SUCCESS_STATUS = 'SUCCESS'
43WARNING_STATUS = 'WARNING'
44FAILED_STATUS = 'FAILED'
45
46# Regex for the RESULT output lines understood by chrome buildbot.
47# Keep in sync with
48# chromium/tools/build/scripts/slave/performance_log_processor.py.
49RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT '
50                           r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= '
51                           r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)('
52                           r' ?(?P<UNITS>.+))?')
53HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM '
54                             r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= '
55                             r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?')
56
57
58class telemetry_Crosperf(test.test):
59    """
60    Run one or more telemetry benchmarks under the crosperf script.
61
62    """
63    version = 1
64
65    def scp_telemetry_results(self, client_ip, dut, file_path, host_dir,
66                              ignore_status=False):
67        """
68        Copy telemetry results from dut.
69
70        @param client_ip: The ip address of the DUT
71        @param dut: The autotest host object representing DUT.
72        @param file_path: The file to copy from DUT.
73        @param host_dir: The directory on host to put the file .
74
75        @returns status code for scp command.
76
77        """
78        cmd = ['scp', DUT_SCP_OPTIONS, RSA_KEY, '-v']
79        port = ''
80
81        if dut:
82          port = dut.port
83          ip = dut.hostname
84        else:
85          ip_and_port = client_ip.split(':')
86          ip = ip_and_port[0]
87          if len(ip_and_port) > 1:
88            port = ip_and_port[1]
89
90        if port:
91          cmd.extend(['-P', str(port)])
92
93        src = 'root@%s:%s' % (ip, file_path)
94        cmd.extend([src, host_dir])
95        command = ' '.join(cmd)
96
97        logging.debug('Retrieving Results: %s', command)
98        try:
99            result = utils.run(
100                    command,
101                    timeout=WAIT_FOR_CMD_TIMEOUT_SECS,
102                    ignore_status=ignore_status)
103            exit_code = result.exit_status
104        except Exception as e:
105            logging.error('Failed to retrieve results: %s', e)
106            raise
107
108        if exit_code:
109          logging.error('Command "%s" returned non-zero status: %d',
110                           command, exit_code)
111        return exit_code
112
113    @contextmanager
114    def no_background(self, *_args):
115        """
116        Background stub.
117
118        """
119        yield
120
121    @contextmanager
122    def run_in_background_with_log(self, cmd, dut, log_path):
123        """
124        Get cpustats periodically in background.
125
126        NOTE: No error handling, exception or error report if command fails
127        to run in background. Command failure is silenced.
128
129        """
130        logging.info('Running in background:\n%s', cmd)
131        pid = dut.run_background(cmd)
132        try:
133            # TODO(denik http://crbug.com/966514): replace with more reliable
134            # way to check run success/failure in background.
135            # Wait some time before checking the process.
136            check = dut.run('sleep 5; kill -0 %s' % pid, ignore_status=True)
137            if check.exit_status != 0:
138                # command wasn't started correctly
139                logging.error(
140                        "Background command wasn't started correctly.\n"
141                        'Command:\n%s', cmd)
142                pid = ''
143                yield
144                return
145
146            logging.info('Background process started successfully, pid %s',
147                         pid)
148            yield
149
150        finally:
151            if pid:
152                # Stop background processes.
153                logging.info('Killing background process, pid %s', pid)
154                # Kill the process blindly. OK if it's already gone.
155                # There is an issue when underlying child processes stay alive
156                # while the parent master process is killed.
157                # The solution is to kill the chain of processes via process
158                # group id.
159                dut.run('pgid=$(cat /proc/%s/stat | cut -d")" -f2 | '
160                        'cut -d" " -f4) && kill -- -$pgid 2>/dev/null' % pid,
161                        ignore_status=True)
162
163                # Copy the results to results directory with silenced failure.
164                scp_res = self.scp_telemetry_results(
165                        '', dut, log_path, self.resultsdir, ignore_status=True)
166                if scp_res:
167                    logging.error(
168                            'scp of cpuinfo logs failed '
169                            'with error %d.', scp_res)
170
171    def run_cpustats_in_background(self, dut, log_name):
172        """
173        Run command to collect CPU stats in background.
174
175        """
176        log_path = '/tmp/%s' % log_name
177        cpu_stats_cmd = (
178                'cpulog=%s; '
179                'rm -f ${cpulog}; '
180                # Stop after 720*0.5min=6hours if not killed at clean-up phase.
181                'for i in {1..720} ; do '
182                # Collect current CPU frequency on all cores and thermal data.
183                ' paste -d" " '
184                '  <(ls /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_cur_freq)'
185                '  <(cat /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_cur_freq)'
186                '  >> ${cpulog} || break; '  # exit loop if fails.
187                ' paste -d" "'
188                '   <(cat /sys/class/thermal/thermal_zone*/type)'
189                '   <(cat /sys/class/thermal/thermal_zone*/temp)'
190                # Filter in thermal data from only CPU-related sources.
191                '  | grep -iE "soc|cpu|pkg|big|little" >> ${cpulog} || break; '
192                # Repeat every 30 sec.
193                ' sleep 30; '
194                'done;') % log_path
195
196        return self.run_in_background_with_log(cpu_stats_cmd, dut, log_path)
197
198    def run_top_in_background(self, dut, log_name, interval_in_sec):
199        """
200        Run top in background.
201
202        """
203        log_path = '/tmp/%s' % log_name
204        top_cmd = (
205                # Run top in batch mode with specified interval and filter out
206                # top system summary and processes not consuming %CPU.
207                # Output of each iteration is separated by a blank line.
208                'HOME=/usr/local COLUMNS=128 top -bi -d%.1f'
209                ' | grep -E "^[ 0-9]|^$" > %s;') % (interval_in_sec, log_path)
210
211        return self.run_in_background_with_log(top_cmd, dut, log_path)
212
213    def run_turbostat_in_background(self, dut, log_name):
214        """
215        Run turbostat in background.
216
217        """
218        log_path = '/tmp/%s' % log_name
219        turbostat_cmd = (
220                'nohup turbostat --quiet --interval 10 '
221                '--show=CPU,Bzy_MHz,Avg_MHz,TSC_MHz,Busy%%,IRQ,CoreTmp '
222                '1> %s') % log_path
223
224        return self.run_in_background_with_log(turbostat_cmd, dut, log_path)
225
226    def run_cpuinfo(self, dut, log_name):
227        """
228        Collect CPU info of "dut" into "log_name" file.
229
230        """
231        cpuinfo_cmd = (
232                'for cpunum in '
233                "   $(awk '/^processor/ { print $NF ; }' /proc/cpuinfo ) ; do "
234                ' for i in'
235                ' `ls -d /sys/devices/system/cpu/cpu"${cpunum}"/cpufreq/'
236                '{cpuinfo_cur_freq,scaling_*_freq,scaling_governor} '
237                '     2>/dev/null` ; do '
238                '  paste -d" " <(echo "${i}") <(cat "${i}"); '
239                ' done;'
240                'done;'
241                'for cpudata in'
242                '  /sys/devices/system/cpu/intel_pstate/no_turbo'
243                '  /sys/devices/system/cpu/online; do '
244                ' if [[ -e "${cpudata}" ]] ; then '
245                '  paste <(echo "${cpudata}") <(cat "${cpudata}"); '
246                ' fi; '
247                'done; '
248                # Adding thermal data to the log.
249                'paste -d" "'
250                '  <(cat /sys/class/thermal/thermal_zone*/type)'
251                '  <(cat /sys/class/thermal/thermal_zone*/temp)')
252
253        # Get CPUInfo at the end of the test.
254        logging.info('Get cpuinfo: %s', cpuinfo_cmd)
255        with open(os.path.join(self.resultsdir, log_name),
256                  'w') as cpu_log_file:
257            # Handle command failure gracefully.
258            res = dut.run(
259                    cpuinfo_cmd, stdout_tee=cpu_log_file, ignore_status=True)
260            if res.exit_status:
261                logging.error('Get cpuinfo command failed with %d.',
262                              res.exit_status)
263
264    def run_once(self, args, client_ip='', dut=None):
265        """
266        Run a single telemetry test.
267
268        @param args: A dictionary of the arguments that were passed
269                to this test.
270        @param client_ip: The ip address of the DUT
271        @param dut: The autotest host object representing DUT.
272
273        @returns A TelemetryResult instance with the results of this execution.
274        """
275        test_name = args.get('test', '')
276        test_args = args.get('test_args', '')
277        profiler_args = args.get('profiler_args', '')
278
279        dut_config_str = args.get('dut_config', '{}')
280        dut_config = json.loads(dut_config_str)
281        # Setup device with dut_config arguments before running test
282        if dut_config:
283            wait_time = device_setup_utils.setup_device(dut, dut_config)
284            # Wait time can be used to accumulate cooldown time in Crosperf.
285            with open(os.path.join(self.resultsdir, WAIT_TIME_LOG), 'w') as f:
286                f.write(str(wait_time))
287
288        output_format = 'histograms'
289
290        # For local runs, we set local=True and use local chrome source to run
291        # tests; for lab runs, we use devserver instead.
292        # By default to be True.
293        local = args.get('local', 'true').lower() == 'true'
294
295        # If run_local=true, telemetry benchmark will run on DUT, otherwise
296        # run remotely from host.
297        # By default to be False.
298        # TODO(zhizhouy): It is better to change the field name from "run_local"
299        # to "telemetry_on_dut" in crosperf experiment files for consistency.
300        telemetry_on_dut = args.get('run_local', '').lower() == 'true'
301
302        # Init TelemetryRunner.
303        tr = telemetry_runner.TelemetryRunner(
304                dut, local=local, telemetry_on_dut=telemetry_on_dut)
305
306        # Run the test. And collect profile if needed.
307        try:
308            # If profiler_args specified, we want to add several more options
309            # to the command so that run_benchmark will collect system wide
310            # profiles.
311            if profiler_args:
312                profiler_opts = [
313                        '--interval-profiling-period=story_run',
314                        '--interval-profiling-target=system_wide',
315                        '--interval-profiler-options="%s"' % profiler_args
316                ]
317
318            top_interval = dut_config.get('top_interval', 0)
319            turbostat = dut_config.get('turbostat')
320
321            run_cpuinfo = self.run_cpustats_in_background if dut \
322                else self.no_background
323            run_turbostat = self.run_turbostat_in_background if (
324                    dut and turbostat) else self.no_background
325            run_top = self.run_top_in_background if (
326                    dut and top_interval > 0) else self.no_background
327
328            # FIXME(denik): replace with ExitStack.
329            with run_cpuinfo(dut, CPUSTATS_LOG) as _cpu_cm, \
330                run_turbostat(dut, TURBOSTAT_LOG) as _turbo_cm, \
331                run_top(dut, TOP_LOG, top_interval) as _top_cm:
332
333                arguments = []
334                if test_args:
335                    arguments.extend(shlex.split(test_args))
336                if profiler_args:
337                    arguments.extend(profiler_opts)
338                logging.debug('Telemetry Arguments: %s', arguments)
339                perf_value_writer = self
340                artifacts = True if profiler_args else False
341                result = tr.run_telemetry_benchmark(
342                        test_name,
343                        perf_value_writer,
344                        *arguments,
345                        ex_output_format=output_format,
346                        results_dir=self.resultsdir,
347                        no_verbose=True,
348                        artifacts=artifacts)
349                logging.info('Telemetry completed with exit status: %s.',
350                             result.status)
351                logging.info('output: %s\n', result.output)
352
353        except (error.TestFail, error.TestWarn):
354            logging.debug(
355                    'Test did not succeed while executing telemetry test.')
356            raise
357        except:
358            logging.debug('Unexpected failure on telemetry_Crosperf.')
359            raise
360        finally:
361            if dut:
362                self.run_cpuinfo(dut, CPUINFO_LOG)
363
364        # Checking whether result file exists.
365        filepath = os.path.join(self.resultsdir, 'histograms.json')
366        if not os.path.exists(filepath):
367            raise RuntimeError('Missing results file: %s' % filepath)
368
369        # Copy the perf data file into the test_that profiling directory,
370        # if necessary. It always comes from DUT.
371        if profiler_args:
372            filepath = os.path.join(self.resultsdir, 'artifacts')
373            if not os.path.isabs(filepath):
374                raise RuntimeError('Expected absolute path of '
375                                   'arfifacts: %s' % filepath)
376            perf_exist = False
377            for root, _dirs, files in os.walk(filepath):
378                for f in files:
379                    if f.endswith('.perf.data'):
380                        perf_exist = True
381                        src_file = os.path.join(root, f)
382                        # results-cache.py in crosperf supports multiple
383                        # perf.data files, but only if they are named exactly
384                        # so. Therefore, create a subdir for each perf.data
385                        # file.
386                        dst_dir = os.path.join(self.profdir, ''.join(
387                                f.split('.')[:-2]))
388                        os.makedirs(dst_dir)
389                        dst_file = os.path.join(dst_dir, 'perf.data')
390                        shutil.copyfile(src_file, dst_file)
391            if not perf_exist:
392                raise error.TestFail('Error: No profiles collected, test may '
393                                     'not run correctly.')
394
395        # In skylab run, environment variable SYNCHRONOUS_OFFLOAD_DIR is set to
396        # be a directory; In test_that run, it will not be set.
397        synchronous_dir = os.getenv('SYNCHRONOUS_OFFLOAD_DIR', '')
398        # For skylab run, push logs and json results to the synchronous offload
399        # directory for instant access for report.
400        if synchronous_dir and os.path.isdir(synchronous_dir):
401            try:
402                dst = os.path.join(synchronous_dir, 'results')
403                shutil.copytree(self.resultsdir, dst)
404            except:
405                raise RuntimeError(
406                    'Failed to copy results in %s to synchronous offload'
407                    ' directory %s' % (self.resultsdir, synchronous_dir))
408
409        return result
410