#!/usr/bin/python2 # Copyright 2017 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """ This is a utility to build a summary of the given directory. and save to a json file. usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB] optional arguments: -p PATH Path to build directory summary. -m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result throttling. The content of the json file looks like: {'default': {'/D': [{'control': {'/S': 734}}, {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}}, {'client.0.ERROR': {'/S': 254}}, {'client.0.INFO': {'/S': 1020}}, {'client.0.WARNING': {'/S': 242}}], '/S': 7214}} ], '/S': 7948 } } """ from __future__ import division from __future__ import print_function import argparse import copy import fnmatch import glob import json import logging import os import random from six.moves import range import sys import time import traceback try: from autotest_lib.client.bin.result_tools import dedupe_file_throttler from autotest_lib.client.bin.result_tools import delete_file_throttler from autotest_lib.client.bin.result_tools import result_info from autotest_lib.client.bin.result_tools import shrink_file_throttler from autotest_lib.client.bin.result_tools import throttler_lib from autotest_lib.client.bin.result_tools import utils_lib from autotest_lib.client.bin.result_tools import zip_file_throttler except ImportError: import dedupe_file_throttler import delete_file_throttler import result_info import shrink_file_throttler import throttler_lib import utils_lib import zip_file_throttler # Do NOT import autotest_lib modules here. This module can be executed without # dependency on other autotest modules. This is to keep the logic of result # trimming on the server side, instead of depending on the autotest client # module. DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' SUMMARY_FILE_PATTERN = 'dir_summary_*.json' MERGED_SUMMARY_FILENAME = 'dir_summary_final.json' # Minimum disk space should be available after saving the summary file. MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 # Autotest uses some state files to track process running state. The files are # deleted from test results. Therefore, these files can be ignored. FILES_TO_IGNORE = set([ 'control.autoserv.state' ]) # Smallest file size to shrink to. MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024 def get_unique_dir_summary_file(path): """Get a unique file path to save the directory summary json string. @param path: The directory path to save the summary file to. """ summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() # Make sure the summary file name is unique. file_name = os.path.join(path, summary_file) if os.path.exists(file_name): count = 1 name, ext = os.path.splitext(summary_file) while os.path.exists(file_name): file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) count += 1 return file_name def _preprocess_result_dir_path(path): """Verify the result directory path is valid and make sure it ends with `/`. @param path: A path to the result directory. @return: A verified and processed path to the result directory. @raise IOError: If the path doesn't exist. @raise ValueError: If the path is not a directory. """ if not os.path.exists(path): raise IOError('Path %s does not exist.' % path) if not os.path.isdir(path): raise ValueError('The given path %s is a file. It must be a ' 'directory.' % path) # Make sure the path ends with `/` so the root key of summary json is always # utils_lib.ROOT_DIR ('') if not path.endswith(os.sep): path = path + os.sep return path def _delete_missing_entries(summary_old, summary_new): """Delete files/directories only exists in old summary. When the new summary is final, i.e., it's built from the final result directory, files or directories missing are considered to be deleted and trimmed to size 0. @param summary_old: Old directory summary. @param summary_new: New directory summary. """ new_files = summary_new.get_file_names() old_files = summary_old.get_file_names() for name in old_files: old_file = summary_old.get_file(name) if name not in new_files: if old_file.is_dir: # Trim sub-directories. with old_file.disable_updating_parent_size_info(): _delete_missing_entries(old_file, result_info.EMPTY) old_file.update_sizes() elif name in FILES_TO_IGNORE: # Remove the file from the summary as it can be ignored. summary_old.remove_file(name) else: with old_file.disable_updating_parent_size_info(): # Before setting the trimmed size to 0, update the collected # size if it's not set yet. if not old_file.is_collected_size_recorded: old_file.collected_size = old_file.trimmed_size old_file.trimmed_size = 0 elif old_file.is_dir: # If `name` is a directory in the old summary, but a file in the new # summary, delete the entry in the old summary. new_file = summary_new.get_file(name) if not new_file.is_dir: new_file = result_info.EMPTY _delete_missing_entries(old_file, new_file) def _relocate_summary(result_dir, summary_file, summary): """Update the given summary with the path relative to the result_dir. @param result_dir: Path to the result directory. @param summary_file: Path to the summary file. @param summary: A directory summary inside the given result_dir or its sub-directory. @return: An updated summary with the path relative to the result_dir. """ sub_path = os.path.dirname(summary_file).replace( result_dir.rstrip(os.sep), '') if sub_path == '': return summary folders = sub_path.split(os.sep) # The first folder is always '' because of the leading `/` in sub_path. parent = result_info.ResultInfo( result_dir, utils_lib.ROOT_DIR, parent_result_info=None) root = parent # That makes sure root has only one folder of utils_lib.ROOT_DIR. for i in range(1, len(folders)): child = result_info.ResultInfo( parent.path, folders[i], parent_result_info=parent) if i == len(folders) - 1: # Add files in summary to child. for info in summary.files: child.files.append(info) parent.files.append(child) parent = child parent.update_sizes() return root def merge_summaries(path): """Merge all directory summaries in the given path. This function calculates the total size of result files being collected for the test device and the files generated on the drone. It also returns merged directory summary. @param path: A path to search for directory summaries. @return a tuple of (client_collected_bytes, merged_summary, files): client_collected_bytes: The total size of results collected from the DUT. The number can be larger than the total file size of the given path, as files can be overwritten or removed. merged_summary: The merged directory summary of the given path. files: All summary files in the given path, including sub-directories. """ path = _preprocess_result_dir_path(path) # Find all directory summary files and sort them by the time stamp in file # name. summary_files = [] for root, _, filenames in os.walk(path): for filename in fnmatch.filter(filenames, 'dir_summary_*.json'): summary_files.append(os.path.join(root, filename)) summary_files = sorted(summary_files, key=os.path.getmtime) all_summaries = [] for summary_file in summary_files: try: summary = result_info.load_summary_json_file(summary_file) summary = _relocate_summary(path, summary_file, summary) all_summaries.append(summary) except (IOError, ValueError) as e: utils_lib.LOG('Failed to load summary file %s Error: %s' % (summary_file, e)) # Merge all summaries. merged_summary = all_summaries[0] if len(all_summaries) > 0 else None for summary in all_summaries[1:]: merged_summary.merge(summary) # After all summaries from the test device (client side) are merged, we can # get the total size of result files being transfered from the test device. # If there is no directory summary collected, default client_collected_bytes # to 0. client_collected_bytes = 0 if merged_summary: client_collected_bytes = merged_summary.collected_size # Get the summary of current directory last_summary = result_info.ResultInfo.build_from_path(path) if merged_summary: merged_summary.merge(last_summary, is_final=True) _delete_missing_entries(merged_summary, last_summary) else: merged_summary = last_summary return client_collected_bytes, merged_summary, summary_files def _throttle_results(summary, max_result_size_KB): """Throttle the test results by limiting to the given maximum size. @param summary: A ResultInfo object containing result summary. @param max_result_size_KB: Maximum test result size in KB. """ if throttler_lib.check_throttle_limit(summary, max_result_size_KB): utils_lib.LOG( 'Result size is %s, which is less than %d KB. No need to ' 'throttle.' % (utils_lib.get_size_string(summary.trimmed_size), max_result_size_KB)) return args = {'summary': summary, 'max_result_size_KB': max_result_size_KB} args_skip_autotest_log = copy.copy(args) args_skip_autotest_log['skip_autotest_log'] = True # Apply the throttlers in following order. throttlers = [ (shrink_file_throttler, copy.copy(args_skip_autotest_log)), (zip_file_throttler, copy.copy(args_skip_autotest_log)), (shrink_file_throttler, copy.copy(args)), (dedupe_file_throttler, copy.copy(args)), (zip_file_throttler, copy.copy(args)), ] # Add another zip_file_throttler to compress the files being shrunk. # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of # shrink_file_throttler. new_args = copy.copy(args) new_args['file_size_threshold_byte'] = 50 * 1024 throttlers.append((zip_file_throttler, new_args)) # If the above throttlers still can't reduce the result size to be under # max_result_size_KB, try to delete files with various threshold, starting # at 5MB then lowering to 100KB. delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024] # Try to keep tgz files first. exclude_file_patterns = ['.*\.tgz'] for threshold in delete_file_thresholds: new_args = copy.copy(args) new_args.update({'file_size_threshold_byte': threshold, 'exclude_file_patterns': exclude_file_patterns}) throttlers.append((delete_file_throttler, new_args)) # Add one more delete_file_throttler to not skipping tgz files. new_args = copy.copy(args) new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]}) throttlers.append((delete_file_throttler, new_args)) # Run the throttlers in order until result size is under max_result_size_KB. old_size = summary.trimmed_size for throttler, args in throttlers: try: args_without_summary = copy.copy(args) del args_without_summary['summary'] utils_lib.LOG('Applying throttler %s, args: %s' % (throttler.__name__, args_without_summary)) throttler.throttle(**args) if throttler_lib.check_throttle_limit(summary, max_result_size_KB): return except: utils_lib.LOG('Failed to apply throttler %s. Exception: %s' % (throttler, traceback.format_exc())) finally: new_size = summary.trimmed_size if new_size == old_size: utils_lib.LOG('Result size was not changed: %s.' % old_size) else: utils_lib.LOG('Result size was reduced from %s to %s.' % (utils_lib.get_size_string(old_size), utils_lib.get_size_string(new_size))) def _setup_logging(): """Set up logging to direct logs to stdout.""" # Direct logging to stdout logger = logging.getLogger() logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s %(message)s') handler.setFormatter(formatter) logger.handlers = [] logger.addHandler(handler) def _parse_options(): """Options for the main script. @return: An option object container arg values. """ parser = argparse.ArgumentParser() parser.add_argument('-p', type=str, dest='path', help='Path to build directory summary.') parser.add_argument('-m', type=int, dest='max_size_KB', default=0, help='Maximum result size in KB. Set to 0 to disable ' 'result throttling.') parser.add_argument('-d', action='store_true', dest='delete_summaries', default=False, help='-d to delete all result summary files in the ' 'given path.') return parser.parse_args() def execute(path, max_size_KB): """Execute the script with given arguments. @param path: Path to build directory summary. @param max_size_KB: Maximum result size in KB. """ utils_lib.LOG('Running result_tools/utils on path: %s' % path) if max_size_KB > 0: utils_lib.LOG('Throttle result size to : %s' % utils_lib.get_size_string(max_size_KB * 1024)) result_dir = path if not os.path.isdir(result_dir): result_dir = os.path.dirname(result_dir) summary = result_info.ResultInfo.build_from_path(path) summary_json = json.dumps(summary) summary_file = get_unique_dir_summary_file(result_dir) # Make sure there is enough free disk to write the file stat = os.statvfs(path) free_space = stat.f_frsize * stat.f_bavail if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: raise utils_lib.NotEnoughDiskError( 'Not enough disk space after saving the summary file. ' 'Available free disk: %s bytes. Summary file size: %s bytes.' % (free_space, len(summary_json))) with open(summary_file, 'w') as f: f.write(summary_json) utils_lib.LOG('Directory summary of %s is saved to file %s.' % (path, summary_file)) if max_size_KB > 0 and summary.trimmed_size > 0: old_size = summary.trimmed_size throttle_probability = float(max_size_KB * 1024) / old_size if random.random() < throttle_probability: utils_lib.LOG( 'Skip throttling %s: size=%s, throttle_probability=%s' % (path, old_size, throttle_probability)) else: _throttle_results(summary, max_size_KB) if summary.trimmed_size < old_size: # Files are throttled, save the updated summary file. utils_lib.LOG('Overwrite the summary file: %s' % summary_file) result_info.save_summary(summary, summary_file) def _delete_summaries(path): """Delete all directory summary files in the given directory. This is to cleanup the directory so no summary files are left behind to affect later tests. @param path: Path to cleanup directory summary. """ # Only summary files directly under the `path` needs to be cleaned. summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN)) for summary in summary_files: try: os.remove(summary) except IOError as e: utils_lib.LOG('Failed to delete summary: %s. Error: %s' % (summary, e)) def main(): """main script. """ _setup_logging() options = _parse_options() if options.delete_summaries: _delete_summaries(options.path) else: execute(options.path, options.max_size_KB) if __name__ == '__main__': main()