1# Copyright 2015 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5# pylint: disable=module-missing-docstring 6# pylint: disable=docstring-section-name 7 8import csv 9import glob 10import httplib 11import json 12import logging 13import os 14import re 15import shutil 16import time 17import urllib 18import urllib2 19 20from autotest_lib.client.bin import site_utils 21from autotest_lib.client.bin import test 22from autotest_lib.client.bin import utils 23from autotest_lib.client.common_lib import error 24from autotest_lib.client.cros import constants 25 26# TODO(scunningham): Return to 72000 (20 hrs) after server-side stabilizes. 27TEST_DURATION = 10800 # Duration of test (3 hrs) in seconds. 28SAMPLE_INTERVAL = 60 # Length of measurement samples in seconds. 29METRIC_INTERVAL = 3600 # Length between metric calculation in seconds. 30STABILIZATION_DURATION = 60 # Time for test stabilization in seconds. 31TMP_DIRECTORY = '/tmp/' 32EXIT_FLAG_FILE = TMP_DIRECTORY + 'longevity_terminate' 33PERF_FILE_NAME_PREFIX = 'perf' 34OLD_FILE_AGE = 14400 # Age of old files to be deleted in minutes = 10 days. 35# The manifest.json file for a Chrome Extension contains the app name, id, 36# version, and other app info. It is accessible by the OS only when the app 37# is running, and thus it's cryptohome directory mounted. Only one Kiosk app 38# can be running at a time. 39MANIFEST_PATTERN = '/home/.shadow/*/mount/user/Extensions/%s/*/manifest.json' 40VERSION_PATTERN = r'^(\d+)\.(\d+)\.(\d+)\.(\d+)$' 41DASHBOARD_UPLOAD_URL = 'https://chromeperf.appspot.com/add_point' 42 43 44class PerfUploadingError(Exception): 45 """Exception raised in perf_uploader.""" 46 pass 47 48 49class longevity_Tracker(test.test): 50 """Monitor device and App stability over long periods of time.""" 51 52 version = 1 53 54 def initialize(self): 55 self.temp_dir = os.path.split(self.tmpdir)[0] 56 57 def _get_cpu_usage(self): 58 """Compute percent CPU in active use over the sample interval. 59 60 Note: This method introduces a sleep period into the test, equal to 61 90% of the sample interval. 62 63 @returns float of percent active use of CPU. 64 """ 65 # Time between measurements is ~90% of the sample interval. 66 measurement_time_delta = SAMPLE_INTERVAL * 0.90 67 cpu_usage_start = site_utils.get_cpu_usage() 68 time.sleep(measurement_time_delta) 69 cpu_usage_end = site_utils.get_cpu_usage() 70 return site_utils.compute_active_cpu_time(cpu_usage_start, 71 cpu_usage_end) * 100 72 73 def _get_mem_usage(self): 74 """Compute percent memory in active use. 75 76 @returns float of percent memory in use. 77 """ 78 total_memory = site_utils.get_mem_total() 79 free_memory = site_utils.get_mem_free() 80 return ((total_memory - free_memory) / total_memory) * 100 81 82 def _get_max_temperature(self): 83 """Get temperature of hottest sensor in Celsius. 84 85 @returns float of temperature of hottest sensor. 86 """ 87 temperature = utils.get_current_temperature_max() 88 if not temperature: 89 temperature = 0 90 return temperature 91 92 def _get_hwid(self): 93 """Get hwid of test device, e.g., 'WOLF C4A-B2B-A47'. 94 95 @returns string of hwid (Hardware ID) of device under test. 96 """ 97 with os.popen('crossystem hwid 2>/dev/null', 'r') as hwid_proc: 98 hwid = hwid_proc.read() 99 if not hwid: 100 hwid = 'undefined' 101 return hwid 102 103 def elapsed_time(self, mark_time): 104 """Get time elapsed since |mark_time|. 105 106 @param mark_time: point in time from which elapsed time is measured. 107 @returns time elapsed since the marked time. 108 """ 109 return time.time() - mark_time 110 111 def modulo_time(self, timer, interval): 112 """Get time eplased on |timer| for the |interval| modulus. 113 114 Value returned is used to adjust the timer so that it is synchronized 115 with the current interval. 116 117 @param timer: time on timer, in seconds. 118 @param interval: period of time in seconds. 119 @returns time elapsed from the start of the current interval. 120 """ 121 return timer % int(interval) 122 123 def syncup_time(self, timer, interval): 124 """Get time remaining on |timer| for the |interval| modulus. 125 126 Value returned is used to induce sleep just long enough to put the 127 process back in sync with the timer. 128 129 @param timer: time on timer, in seconds. 130 @param interval: period of time in seconds. 131 @returns time remaining till the end of the current interval. 132 """ 133 return interval - (timer % int(interval)) 134 135 def _record_perf_measurements(self, perf_values, perf_writer): 136 """Record attribute performance measurements, and write to file. 137 138 @param perf_values: dict of attribute performance values. 139 @param perf_writer: file to write performance measurements. 140 """ 141 # Get performance measurements. 142 cpu_usage = '%.3f' % self._get_cpu_usage() 143 mem_usage = '%.3f' % self._get_mem_usage() 144 max_temp = '%.3f' % self._get_max_temperature() 145 146 # Append measurements to attribute lists in perf values dictionary. 147 perf_values['cpu'].append(cpu_usage) 148 perf_values['mem'].append(mem_usage) 149 perf_values['temp'].append(max_temp) 150 151 # Write performance measurements to perf timestamped file. 152 time_stamp = time.strftime('%Y/%m/%d %H:%M:%S') 153 perf_writer.writerow([time_stamp, cpu_usage, mem_usage, max_temp]) 154 logging.info('Time: %s, CPU: %s, Mem: %s, Temp: %s', 155 time_stamp, cpu_usage, mem_usage, max_temp) 156 157 def _record_90th_metrics(self, perf_values, perf_metrics): 158 """Record 90th percentile metric of attribute performance values. 159 160 @param perf_values: dict attribute performance values. 161 @param perf_metrics: dict attribute 90%-ile performance metrics. 162 """ 163 # Calculate 90th percentile for each attribute. 164 cpu_values = perf_values['cpu'] 165 mem_values = perf_values['mem'] 166 temp_values = perf_values['temp'] 167 cpu_metric = sorted(cpu_values)[(len(cpu_values) * 9) // 10] 168 mem_metric = sorted(mem_values)[(len(mem_values) * 9) // 10] 169 temp_metric = sorted(temp_values)[(len(temp_values) * 9) // 10] 170 logging.info('== Performance values: %s', perf_values) 171 logging.info('== 90th percentile: cpu: %s, mem: %s, temp: %s', 172 cpu_metric, mem_metric, temp_metric) 173 174 # Append 90th percentile to each attribute performance metric. 175 perf_metrics['cpu'].append(cpu_metric) 176 perf_metrics['mem'].append(mem_metric) 177 perf_metrics['temp'].append(temp_metric) 178 179 def _get_median_metrics(self, metrics): 180 """Returns median of each attribute performance metric. 181 182 If no metric values were recorded, return 0 for each metric. 183 184 @param metrics: dict of attribute performance metric lists. 185 @returns dict of attribute performance metric medians. 186 """ 187 if len(metrics['cpu']): 188 cpu_metric = sorted(metrics['cpu'])[len(metrics['cpu']) // 2] 189 mem_metric = sorted(metrics['mem'])[len(metrics['mem']) // 2] 190 temp_metric = sorted(metrics['temp'])[len(metrics['temp']) // 2] 191 else: 192 cpu_metric = 0 193 mem_metric = 0 194 temp_metric = 0 195 logging.info('== Median: cpu: %s, mem: %s, temp: %s', 196 cpu_metric, mem_metric, temp_metric) 197 return {'cpu': cpu_metric, 'mem': mem_metric, 'temp': temp_metric} 198 199 def _append_to_aggregated_file(self, ts_file, ag_file): 200 """Append contents of perf timestamp file to perf aggregated file. 201 202 @param ts_file: file handle for performance timestamped file. 203 @param ag_file: file handle for performance aggregated file. 204 """ 205 next(ts_file) # Skip fist line (the header) of timestamped file. 206 for line in ts_file: 207 ag_file.write(line) 208 209 def _copy_aggregated_to_resultsdir(self, aggregated_fpath): 210 """Copy perf aggregated file to results dir for AutoTest results. 211 212 Note: The AutoTest results default directory is located at /usr/local/ 213 autotest/results/default/longevity_Tracker/results 214 215 @param aggregated_fpath: file path to Aggregated performance values. 216 """ 217 results_fpath = os.path.join(self.resultsdir, 'perf.csv') 218 shutil.copy(aggregated_fpath, results_fpath) 219 logging.info('Copied %s to %s)', aggregated_fpath, results_fpath) 220 221 def _write_perf_keyvals(self, perf_results): 222 """Write perf results to keyval file for AutoTest results. 223 224 @param perf_results: dict of attribute performance metrics. 225 """ 226 perf_keyval = {} 227 perf_keyval['cpu_usage'] = perf_results['cpu'] 228 perf_keyval['memory_usage'] = perf_results['mem'] 229 perf_keyval['temperature'] = perf_results['temp'] 230 self.write_perf_keyval(perf_keyval) 231 232 def _write_perf_results(self, perf_results): 233 """Write perf results to results-chart.json file for Perf Dashboard. 234 235 @param perf_results: dict of attribute performance metrics. 236 """ 237 cpu_metric = perf_results['cpu'] 238 mem_metric = perf_results['mem'] 239 ec_metric = perf_results['temp'] 240 self.output_perf_value(description='cpu_usage', value=cpu_metric, 241 units='%', higher_is_better=False) 242 self.output_perf_value(description='mem_usage', value=mem_metric, 243 units='%', higher_is_better=False) 244 self.output_perf_value(description='max_temp', value=ec_metric, 245 units='Celsius', higher_is_better=False) 246 247 def _read_perf_results(self): 248 """Read perf results from results-chart.json file for Perf Dashboard. 249 250 @returns dict of perf results, formatted as JSON chart data. 251 """ 252 results_file = os.path.join(self.resultsdir, 'results-chart.json') 253 with open(results_file, 'r') as fp: 254 contents = fp.read() 255 chart_data = json.loads(contents) 256 return chart_data 257 258 def _get_point_id(self, cros_version, epoch_minutes): 259 """Compute point ID from ChromeOS version number and epoch minutes. 260 261 @param cros_version: String of ChromeOS version number. 262 @param epoch_minutes: String of minutes since 1970. 263 264 @return unique integer ID computed from given version and epoch. 265 """ 266 # Number of digits from each part of the Chrome OS version string. 267 cros_version_col_widths = [0, 4, 3, 2] 268 269 def get_digits(version_num, column_widths): 270 if re.match(VERSION_PATTERN, version_num): 271 computed_string = '' 272 version_parts = version_num.split('.') 273 for i, version_part in enumerate(version_parts): 274 if column_widths[i]: 275 computed_string += version_part.zfill(column_widths[i]) 276 return computed_string 277 else: 278 return None 279 280 cros_digits = get_digits(cros_version, cros_version_col_widths) 281 epoch_digits = epoch_minutes[-8:] 282 if not cros_digits: 283 return None 284 return int(epoch_digits + cros_digits) 285 286 def _get_kiosk_app_info(self, app_id): 287 """Get kiosk app name and version from manifest.json file. 288 289 Get the Kiosk App name and version strings from the manifest file of 290 the specified |app_id| Extension in the currently running session. If 291 |app_id| is empty or None, then return 'none' for the kiosk app info. 292 293 Raise an error if no manifest is found (ie, |app_id| is not running), 294 or if multiple manifest files are found (ie, |app_id| is running, but 295 the |app_id| dir contains multiple versions or manifest files). 296 297 @param app_id: string kiosk application identification. 298 @returns dict of Kiosk name and version number strings. 299 @raises: An error.TestError if single manifest is not found. 300 """ 301 kiosk_app_info = {'name': 'none', 'version': 'none'} 302 if not app_id: 303 return kiosk_app_info 304 305 # Get path to manifest file of the running Kiosk app_id. 306 app_manifest_pattern = (MANIFEST_PATTERN % app_id) 307 logging.info('app_manifest_pattern: %s', app_manifest_pattern) 308 file_paths = glob.glob(app_manifest_pattern) 309 # Raise error if current session has no Kiosk Apps running. 310 if len(file_paths) == 0: 311 raise error.TestError('Kiosk App ID=%s is not running.' % app_id) 312 # Raise error if running Kiosk App has multiple manifest files. 313 if len(file_paths) > 1: 314 raise error.TestError('Kiosk App ID=%s has multiple manifest ' 315 'files.' % app_id) 316 kiosk_manifest = open(file_paths[0], 'r').read() 317 manifest_json = json.loads(kiosk_manifest) 318 # If manifest is missing name or version key, set to 'undefined'. 319 kiosk_app_info['name'] = manifest_json.get('name', 'undefined') 320 kiosk_app_info['version'] = manifest_json.get('version', 'undefined') 321 return kiosk_app_info 322 323 def _format_data_for_upload(self, chart_data): 324 """Collect chart data into an uploadable data JSON object. 325 326 @param chart_data: performance results formatted as chart data. 327 """ 328 perf_values = { 329 'format_version': '1.0', 330 'benchmark_name': self.test_suite_name, 331 'charts': chart_data, 332 } 333 334 dash_entry = { 335 'master': 'ChromeOS_Enterprise', 336 'bot': 'cros-%s' % self.board_name, 337 'point_id': self.point_id, 338 'versions': { 339 'cros_version': self.chromeos_version, 340 'chrome_version': self.chrome_version, 341 }, 342 'supplemental': { 343 'default_rev': 'r_cros_version', 344 'hardware_identifier': 'a_' + self.hw_id, 345 'kiosk_app_name': 'a_' + self.kiosk_app_name, 346 'kiosk_app_version': 'r_' + self.kiosk_app_version 347 }, 348 'chart_data': perf_values 349 } 350 return {'data': json.dumps(dash_entry)} 351 352 def _send_to_dashboard(self, data_obj): 353 """Send formatted perf data to the perf dashboard. 354 355 @param data_obj: data object as returned by _format_data_for_upload(). 356 357 @raises PerfUploadingError if an exception was raised when uploading. 358 """ 359 logging.debug('data_obj: %s', data_obj) 360 encoded = urllib.urlencode(data_obj) 361 req = urllib2.Request(DASHBOARD_UPLOAD_URL, encoded) 362 try: 363 urllib2.urlopen(req) 364 except urllib2.HTTPError as e: 365 raise PerfUploadingError('HTTPError: %d %s for JSON %s\n' % 366 (e.code, e.msg, data_obj['data'])) 367 except urllib2.URLError as e: 368 raise PerfUploadingError('URLError: %s for JSON %s\n' % 369 (str(e.reason), data_obj['data'])) 370 except httplib.HTTPException: 371 raise PerfUploadingError('HTTPException for JSON %s\n' % 372 data_obj['data']) 373 374 def _get_chrome_version(self): 375 """Get the Chrome version number and milestone as strings. 376 377 Invoke "chrome --version" to get the version number and milestone. 378 379 @return A tuple (chrome_ver, milestone) where "chrome_ver" is the 380 current Chrome version number as a string (in the form "W.X.Y.Z") 381 and "milestone" is the first component of the version number 382 (the "W" from "W.X.Y.Z"). If the version number cannot be parsed 383 in the "W.X.Y.Z" format, the "chrome_ver" will be the full output 384 of "chrome --version" and the milestone will be the empty string. 385 """ 386 chrome_version = utils.system_output(constants.CHROME_VERSION_COMMAND, 387 ignore_status=True) 388 chrome_version = utils.parse_chrome_version(chrome_version) 389 return chrome_version 390 391 def _open_perf_file(self, file_path): 392 """Open a perf file. Write header line if new. Return file object. 393 394 If the file on |file_path| already exists, then open file for 395 appending only. Otherwise open for writing only. 396 397 @param file_path: file path for perf file. 398 @returns file object for the perf file. 399 """ 400 # If file exists, open it for appending. Do not write header. 401 if os.path.isfile(file_path): 402 perf_file = open(file_path, 'a+') 403 # Otherwise, create it for writing. Write header on first line. 404 else: 405 perf_file = open(file_path, 'w') # Erase if existing file. 406 perf_file.write('Time,CPU,Memory,Temperature (C)\r\n') 407 return perf_file 408 409 def _run_test_cycle(self): 410 """Track performance of Chrome OS over a long period of time. 411 412 This method collects performance measurements, and calculates metrics 413 to upload to the performance dashboard. It creates two files to 414 collect and store performance values and results: perf_<timestamp>.csv 415 and perf_aggregated.csv. 416 417 At the start, it creates a unique perf timestamped file in the test's 418 temp_dir. As the cycle runs, it saves a time-stamped performance 419 value after each sample interval. Periodically, it calculates 420 the 90th percentile performance metrics from these values. 421 422 The perf_<timestamp> files on the device will survive multiple runs 423 of the longevity_Tracker by the server-side test, and will also 424 survive multiple runs of the server-side test. The script will 425 delete them after 10 days, to prevent filling up the SSD. 426 427 At the end, it opens the perf aggregated file in the test's temp_dir, 428 and appends the contents of the perf timestamped file. It then 429 copies the perf aggregated file to the results directory as perf.csv. 430 This perf.csv file will be consumed by the AutoTest backend when the 431 server-side test ends. 432 433 Note that the perf_aggregated.csv file will grow larger with each run 434 of longevity_Tracker on the device by the server-side test. However, 435 the server-side test will delete file in the end. 436 437 This method also calculates 90th percentile and median metrics, and 438 returns the median metrics. Median metrics will be pushed to the perf 439 dashboard with a unique point_id. 440 441 @returns list of median performance metrics. 442 """ 443 # Allow system to stabilize before start taking measurements. 444 test_start_time = time.time() 445 time.sleep(STABILIZATION_DURATION) 446 447 perf_values = {'cpu': [], 'mem': [], 'temp': []} 448 perf_metrics = {'cpu': [], 'mem': [], 'temp': []} 449 450 # Create perf_<timestamp> file and writer. 451 timestamp_fname = (PERF_FILE_NAME_PREFIX + 452 time.strftime('_%Y-%m-%d_%H-%M') + '.csv') 453 timestamp_fpath = os.path.join(self.temp_dir, timestamp_fname) 454 timestamp_file = self._open_perf_file(timestamp_fpath) 455 timestamp_writer = csv.writer(timestamp_file) 456 457 # Align time of loop start with the sample interval. 458 test_elapsed_time = self.elapsed_time(test_start_time) 459 time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL)) 460 test_elapsed_time = self.elapsed_time(test_start_time) 461 462 metric_start_time = time.time() 463 metric_prev_time = metric_start_time 464 465 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 466 offset = self.modulo_time(metric_elapsed_prev_time, METRIC_INTERVAL) 467 metric_timer = metric_elapsed_prev_time + offset 468 while self.elapsed_time(test_start_time) <= TEST_DURATION: 469 if os.path.isfile(EXIT_FLAG_FILE): 470 logging.info('Exit flag file detected. Exiting test.') 471 break 472 self._record_perf_measurements(perf_values, timestamp_writer) 473 474 # Periodically calculate and record 90th percentile metrics. 475 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 476 metric_timer = metric_elapsed_prev_time + offset 477 if metric_timer >= METRIC_INTERVAL: 478 self._record_90th_metrics(perf_values, perf_metrics) 479 perf_values = {'cpu': [], 'mem': [], 'temp': []} 480 481 # Set previous time to current time. 482 metric_prev_time = time.time() 483 metric_elapsed_prev_time = self.elapsed_time(metric_prev_time) 484 485 # Calculate offset based on the original start time. 486 metric_elapsed_time = self.elapsed_time(metric_start_time) 487 offset = self.modulo_time(metric_elapsed_time, METRIC_INTERVAL) 488 489 # Set the timer to time elapsed plus offset to next interval. 490 metric_timer = metric_elapsed_prev_time + offset 491 492 # Sync the loop time to the sample interval. 493 test_elapsed_time = self.elapsed_time(test_start_time) 494 time.sleep(self.syncup_time(test_elapsed_time, SAMPLE_INTERVAL)) 495 496 # Close perf timestamp file. 497 timestamp_file.close() 498 499 # Open perf timestamp file to read, and aggregated file to append. 500 timestamp_file = open(timestamp_fpath, 'r') 501 aggregated_fname = (PERF_FILE_NAME_PREFIX + '_aggregated.csv') 502 aggregated_fpath = os.path.join(self.temp_dir, aggregated_fname) 503 aggregated_file = self._open_perf_file(aggregated_fpath) 504 505 # Append contents of perf timestamp file to perf aggregated file. 506 self._append_to_aggregated_file(timestamp_file, aggregated_file) 507 timestamp_file.close() 508 aggregated_file.close() 509 510 # Copy perf aggregated file to test results directory. 511 self._copy_aggregated_to_resultsdir(aggregated_fpath) 512 513 # Return median of each attribute performance metric. 514 return self._get_median_metrics(perf_metrics) 515 516 def run_once(self, kiosk_app_attributes=None): 517 if kiosk_app_attributes: 518 app_name, app_id, ext_page = ( 519 kiosk_app_attributes.rstrip().split(':')) 520 self.subtest_name = app_name 521 self.board_name = utils.get_board() 522 self.hw_id = self._get_hwid() 523 self.chrome_version = self._get_chrome_version()[0] 524 self.chromeos_version = '0.' + utils.get_chromeos_release_version() 525 self.epoch_minutes = str(int(time.time() / 60)) # Minutes since 1970. 526 self.point_id = self._get_point_id(self.chromeos_version, 527 self.epoch_minutes) 528 529 kiosk_info = self._get_kiosk_app_info(app_id) 530 self.kiosk_app_name = kiosk_info['name'] 531 self.kiosk_app_version = kiosk_info['version'] 532 self.test_suite_name = self.tagged_testname 533 if self.subtest_name: 534 self.test_suite_name += '.' + self.subtest_name 535 536 # Delete exit flag file at start of test run. 537 if os.path.isfile(EXIT_FLAG_FILE): 538 os.remove(EXIT_FLAG_FILE) 539 540 # Run a single test cycle. 541 self.perf_results = {'cpu': '0', 'mem': '0', 'temp': '0'} 542 self.perf_results = self._run_test_cycle() 543 544 # Write results for AutoTest to pick up at end of test. 545 self._write_perf_keyvals(self.perf_results) 546 self._write_perf_results(self.perf_results) 547 548 # Post perf results directly to performance dashboard. You may view 549 # uploaded data at https://chromeperf.appspot.com/new_points, 550 # with test path pattern=ChromeOS_Enterprise/cros-*/longevity*/* 551 chart_data = self._read_perf_results() 552 data_obj = self._format_data_for_upload(chart_data) 553 self._send_to_dashboard(data_obj) 554 555 def cleanup(self): 556 """Delete aged perf data files and the exit flag file.""" 557 cmd = ('find %s -name %s* -type f -mmin +%s -delete' % 558 (self.temp_dir, PERF_FILE_NAME_PREFIX, OLD_FILE_AGE)) 559 os.system(cmd) 560 if os.path.isfile(EXIT_FLAG_FILE): 561 os.remove(EXIT_FLAG_FILE) 562