1# Copyright 2015 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""This module provides some tools to interact with LXC containers, for example: 6 1. Download base container from given GS location, setup the base container. 7 2. Create a snapshot as test container from base container. 8 3. Mount a directory in drone to the test container. 9 4. Run a command in the container and return the output. 10 5. Cleanup, e.g., destroy the container. 11 12This tool can also be used to set up a base container for test. For example, 13 python lxc.py -s -p /tmp/container 14This command will download and setup base container in directory /tmp/container. 15After that command finishes, you can run lxc command to work with the base 16container, e.g., 17 lxc-start -P /tmp/container -n base -d 18 lxc-attach -P /tmp/container -n base 19""" 20 21 22import argparse 23import logging 24import os 25import re 26import socket 27import sys 28import time 29 30import common 31from autotest_lib.client.bin import utils 32from autotest_lib.client.common_lib import error 33from autotest_lib.client.common_lib import global_config 34from autotest_lib.client.common_lib.cros import dev_server 35from autotest_lib.client.common_lib.cros import retry 36from autotest_lib.client.common_lib.cros.graphite import autotest_es 37from autotest_lib.server import utils as server_utils 38from autotest_lib.site_utils import lxc_config 39from autotest_lib.site_utils import lxc_utils 40 41try: 42 from chromite.lib import metrics 43except ImportError: 44 metrics = utils.metrics_mock 45 46 47config = global_config.global_config 48 49# Name of the base container. 50BASE = config.get_config_value('AUTOSERV', 'container_base_name') 51# Naming convention of test container, e.g., test_300_1422862512_2424, where: 52# 300: The test job ID. 53# 1422862512: The tick when container is created. 54# 2424: The PID of autoserv that starts the container. 55TEST_CONTAINER_NAME_FMT = 'test_%s_%d_%d' 56# Naming convention of the result directory in test container. 57RESULT_DIR_FMT = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'results', 58 '%s') 59# Attributes to retrieve about containers. 60ATTRIBUTES = ['name', 'state'] 61 62# Format for mount entry to share a directory in host with container. 63# source is the directory in host, destination is the directory in container. 64# readonly is a binding flag for readonly mount, its value should be `,ro`. 65MOUNT_FMT = ('lxc.mount.entry = %(source)s %(destination)s none ' 66 'bind%(readonly)s 0 0') 67SSP_ENABLED = config.get_config_value('AUTOSERV', 'enable_ssp_container', 68 type=bool, default=True) 69# url to the folder stores base container. 70CONTAINER_BASE_FOLDER_URL = config.get_config_value('AUTOSERV', 71 'container_base_folder_url') 72CONTAINER_BASE_URL_FMT = '%s/%%s.tar.xz' % CONTAINER_BASE_FOLDER_URL 73CONTAINER_BASE_URL = CONTAINER_BASE_URL_FMT % BASE 74# Default directory used to store LXC containers. 75DEFAULT_CONTAINER_PATH = config.get_config_value('AUTOSERV', 'container_path') 76 77# Path to drone_temp folder in the container, which stores the control file for 78# test job to run. 79CONTROL_TEMP_PATH = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'drone_tmp') 80 81# Bash command to return the file count in a directory. Test the existence first 82# so the command can return an error code if the directory doesn't exist. 83COUNT_FILE_CMD = '[ -d %(dir)s ] && ls %(dir)s | wc -l' 84 85# Command line to append content to a file 86APPEND_CMD_FMT = ('echo \'%(content)s\' | sudo tee --append %(file)s' 87 '> /dev/null') 88 89# Path to site-packates in Moblab 90MOBLAB_SITE_PACKAGES = '/usr/lib64/python2.7/site-packages' 91MOBLAB_SITE_PACKAGES_CONTAINER = '/usr/local/lib/python2.7/dist-packages/' 92 93# Flag to indicate it's running in a Moblab. Due to crbug.com/457496, lxc-ls has 94# different behavior in Moblab. 95IS_MOBLAB = utils.is_moblab() 96 97# TODO(dshi): If we are adding more logic in how lxc should interact with 98# different systems, we should consider code refactoring to use a setting-style 99# object to store following flags mapping to different systems. 100# TODO(crbug.com/464834): Snapshot clone is disabled until Moblab can 101# support overlayfs or aufs, which requires a newer kernel. 102SUPPORT_SNAPSHOT_CLONE = not IS_MOBLAB 103 104# Number of seconds to wait for network to be up in a container. 105NETWORK_INIT_TIMEOUT = 300 106# Network bring up is slower in Moblab. 107NETWORK_INIT_CHECK_INTERVAL = 2 if IS_MOBLAB else 0.1 108 109# Type string for container related metadata. 110CONTAINER_CREATE_METADB_TYPE = 'container_create' 111CONTAINER_CREATE_RETRY_METADB_TYPE = 'container_create_retry' 112CONTAINER_RUN_TEST_METADB_TYPE = 'container_run_test' 113 114# The container's hostname MUST start with `test_`. DHCP server in MobLab uses 115# that prefix to determine the lease time. 116CONTAINER_UTSNAME_FORMAT = 'test_%s' 117 118STATS_KEY = 'chromeos/autotest/lxc' 119 120 121def _get_container_info_moblab(container_path, **filters): 122 """Get a collection of container information in the given container path 123 in a Moblab. 124 125 TODO(crbug.com/457496): remove this method once python 3 can be installed 126 in Moblab and lxc-ls command can use python 3 code. 127 128 When running in Moblab, lxc-ls behaves differently from a server with python 129 3 installed: 130 1. lxc-ls returns a list of containers installed under /etc/lxc, the default 131 lxc container directory. 132 2. lxc-ls --active lists all active containers, regardless where the 133 container is located. 134 For such differences, we have to special case Moblab to make the behavior 135 close to a server with python 3 installed. That is, 136 1. List only containers in a given folder. 137 2. Assume all active containers have state of RUNNING. 138 139 @param container_path: Path to look for containers. 140 @param filters: Key value to filter the containers, e.g., name='base' 141 142 @return: A list of dictionaries that each dictionary has the information of 143 a container. The keys are defined in ATTRIBUTES. 144 """ 145 info_collection = [] 146 active_containers = utils.run('sudo lxc-ls --active').stdout.split() 147 name_filter = filters.get('name', None) 148 state_filter = filters.get('state', None) 149 if filters and set(filters.keys()) - set(['name', 'state']): 150 raise error.ContainerError('When running in Moblab, container list ' 151 'filter only supports name and state.') 152 153 for name in os.listdir(container_path): 154 # Skip all files and folders without rootfs subfolder. 155 if (os.path.isfile(os.path.join(container_path, name)) or 156 not lxc_utils.path_exists(os.path.join(container_path, name, 157 'rootfs'))): 158 continue 159 info = {'name': name, 160 'state': 'RUNNING' if name in active_containers else 'STOPPED' 161 } 162 if ((name_filter and name_filter != info['name']) or 163 (state_filter and state_filter != info['state'])): 164 continue 165 166 info_collection.append(info) 167 return info_collection 168 169 170def get_container_info(container_path, **filters): 171 """Get a collection of container information in the given container path. 172 173 This method parse the output of lxc-ls to get a list of container 174 information. The lxc-ls command output looks like: 175 NAME STATE IPV4 IPV6 AUTOSTART PID MEMORY RAM SWAP 176 -------------------------------------------------------------------------- 177 base STOPPED - - NO - - - - 178 test_123 RUNNING 10.0.3.27 - NO 8359 6.28MB 6.28MB 0.0MB 179 180 @param container_path: Path to look for containers. 181 @param filters: Key value to filter the containers, e.g., name='base' 182 183 @return: A list of dictionaries that each dictionary has the information of 184 a container. The keys are defined in ATTRIBUTES. 185 """ 186 if IS_MOBLAB: 187 return _get_container_info_moblab(container_path, **filters) 188 189 cmd = 'sudo lxc-ls -P %s -f -F %s' % (os.path.realpath(container_path), 190 ','.join(ATTRIBUTES)) 191 output = utils.run(cmd).stdout 192 info_collection = [] 193 194 for line in output.splitlines()[1:]: 195 # Only LXC 1.x has the second line of '-' as a separator. 196 if line.startswith('------'): 197 continue 198 info_collection.append(dict(zip(ATTRIBUTES, line.split()))) 199 if filters: 200 filtered_collection = [] 201 for key, value in filters.iteritems(): 202 for info in info_collection: 203 if key in info and info[key] == value: 204 filtered_collection.append(info) 205 info_collection = filtered_collection 206 return info_collection 207 208 209def cleanup_if_fail(): 210 """Decorator to do cleanup if container fails to be set up. 211 """ 212 def deco_cleanup_if_fail(func): 213 """Wrapper for the decorator. 214 215 @param func: Function to be called. 216 """ 217 def func_cleanup_if_fail(*args, **kwargs): 218 """Decorator to do cleanup if container fails to be set up. 219 220 The first argument must be a ContainerBucket object, which can be 221 used to retrieve the container object by name. 222 223 @param func: function to be called. 224 @param args: arguments for function to be called. 225 @param kwargs: keyword arguments for function to be called. 226 """ 227 bucket = args[0] 228 name = utils.get_function_arg_value(func, 'name', args, kwargs) 229 try: 230 skip_cleanup = utils.get_function_arg_value( 231 func, 'skip_cleanup', args, kwargs) 232 except (KeyError, ValueError): 233 skip_cleanup = False 234 try: 235 return func(*args, **kwargs) 236 except: 237 exc_info = sys.exc_info() 238 try: 239 container = bucket.get(name) 240 if container and not skip_cleanup: 241 container.destroy() 242 except error.CmdError as e: 243 logging.error(e) 244 245 try: 246 job_id = utils.get_function_arg_value( 247 func, 'job_id', args, kwargs) 248 except (KeyError, ValueError): 249 job_id = '' 250 metadata={'drone': socket.gethostname(), 251 'job_id': job_id, 252 'success': False} 253 # Record all args if job_id is not available. 254 if not job_id: 255 metadata['args'] = str(args) 256 if kwargs: 257 metadata.update(kwargs) 258 autotest_es.post(use_http=True, 259 type_str=CONTAINER_CREATE_METADB_TYPE, 260 metadata=metadata) 261 262 # Raise the cached exception with original backtrace. 263 raise exc_info[0], exc_info[1], exc_info[2] 264 return func_cleanup_if_fail 265 return deco_cleanup_if_fail 266 267 268@retry.retry(error.CmdError, timeout_min=5) 269def download_extract(url, target, extract_dir): 270 """Download the file from given url and save it to the target, then extract. 271 272 @param url: Url to download the file. 273 @param target: Path of the file to save to. 274 @param extract_dir: Directory to extract the content of the file to. 275 """ 276 remote_url = dev_server.DevServer.get_server_url(url) 277 # TODO(xixuan): Better to only ssh to devservers in lab, and continue using 278 # wget for ganeti devservers. 279 if remote_url in dev_server.ImageServerBase.servers(): 280 tmp_file = '/tmp/%s' % os.path.basename(target) 281 dev_server.ImageServerBase.download_file(url, tmp_file, timeout=300) 282 utils.run('sudo mv %s %s' % (tmp_file, target)) 283 else: 284 utils.run('sudo wget --timeout=300 -nv %s -O %s' % (url, target), 285 stderr_tee=utils.TEE_TO_LOGS) 286 287 utils.run('sudo tar -xvf %s -C %s' % (target, extract_dir)) 288 289 290def install_package_precheck(packages): 291 """If SSP is not enabled or the test is running in chroot (using test_that), 292 packages installation should be skipped. 293 294 The check does not raise exception so tests started by test_that or running 295 in an Autotest setup with SSP disabled can continue. That assume the running 296 environment, chroot or a machine, has the desired packages installed 297 already. 298 299 @param packages: A list of names of the packages to install. 300 301 @return: True if package installation can continue. False if it should be 302 skipped. 303 304 """ 305 if not SSP_ENABLED and not utils.is_in_container(): 306 logging.info('Server-side packaging is not enabled. Install package %s ' 307 'is skipped.', packages) 308 return False 309 310 if server_utils.is_inside_chroot(): 311 logging.info('Test is running inside chroot. Install package %s is ' 312 'skipped.', packages) 313 return False 314 315 if not utils.is_in_container(): 316 raise error.ContainerError('Package installation is only supported ' 317 'when test is running inside container.') 318 319 return True 320 321 322@metrics.SecondsTimerDecorator('%s/install_packages_duration' % STATS_KEY) 323@retry.retry(error.CmdError, timeout_min=30) 324def install_packages(packages=[], python_packages=[], force_latest=False): 325 """Install the given package inside container. 326 327 !!! WARNING !!! 328 This call may introduce several minutes of delay in test run. The best way 329 to avoid such delay is to update the base container used for the test run. 330 File a bug for infra deputy to update the base container with the new 331 package a test requires. 332 333 @param packages: A list of names of the packages to install. 334 @param python_packages: A list of names of the python packages to install 335 using pip. 336 @param force_latest: True to force to install the latest version of the 337 package. Default to False, which means skip installing 338 the package if it's installed already, even with an old 339 version. 340 341 @raise error.ContainerError: If package is attempted to be installed outside 342 a container. 343 @raise error.CmdError: If the package doesn't exist or failed to install. 344 345 """ 346 if not install_package_precheck(packages or python_packages): 347 return 348 349 # If force_latest is False, only install packages that are not already 350 # installed. 351 if not force_latest: 352 packages = [p for p in packages if not utils.is_package_installed(p)] 353 python_packages = [p for p in python_packages 354 if not utils.is_python_package_installed(p)] 355 if not packages and not python_packages: 356 logging.debug('All packages are installed already, skip reinstall.') 357 return 358 359 # Always run apt-get update before installing any container. The base 360 # container may have outdated cache. 361 utils.run('sudo apt-get update') 362 # Make sure the lists are not None for iteration. 363 packages = [] if not packages else packages 364 if python_packages: 365 packages.extend(['python-pip', 'python-dev']) 366 if packages: 367 utils.run('sudo apt-get install %s -y --force-yes' % ' '.join(packages)) 368 logging.debug('Packages are installed: %s.', packages) 369 370 target_setting = '' 371 # For containers running in Moblab, /usr/local/lib/python2.7/dist-packages/ 372 # is a readonly mount from the host. Therefore, new python modules have to 373 # be installed in /usr/lib/python2.7/dist-packages/ 374 # Containers created in Moblab does not have autotest/site-packages folder. 375 if not os.path.exists('/usr/local/autotest/site-packages'): 376 target_setting = '--target="/usr/lib/python2.7/dist-packages/"' 377 if python_packages: 378 utils.run('sudo pip install %s %s' % (target_setting, 379 ' '.join(python_packages))) 380 logging.debug('Python packages are installed: %s.', python_packages) 381 382 383@retry.retry(error.CmdError, timeout_min=20) 384def install_package(package): 385 """Install the given package inside container. 386 387 This function is kept for backwards compatibility reason. New code should 388 use function install_packages for better performance. 389 390 @param package: Name of the package to install. 391 392 @raise error.ContainerError: If package is attempted to be installed outside 393 a container. 394 @raise error.CmdError: If the package doesn't exist or failed to install. 395 396 """ 397 logging.warn('This function is obsoleted, please use install_packages ' 398 'instead.') 399 install_packages(packages=[package]) 400 401 402@retry.retry(error.CmdError, timeout_min=20) 403def install_python_package(package): 404 """Install the given python package inside container using pip. 405 406 This function is kept for backwards compatibility reason. New code should 407 use function install_packages for better performance. 408 409 @param package: Name of the python package to install. 410 411 @raise error.CmdError: If the package doesn't exist or failed to install. 412 """ 413 logging.warn('This function is obsoleted, please use install_packages ' 414 'instead.') 415 install_packages(python_packages=[package]) 416 417 418class Container(object): 419 """A wrapper class of an LXC container. 420 421 The wrapper class provides methods to interact with a container, e.g., 422 start, stop, destroy, run a command. It also has attributes of the 423 container, including: 424 name: Name of the container. 425 state: State of the container, e.g., ABORTING, RUNNING, STARTING, STOPPED, 426 or STOPPING. 427 428 lxc-ls can also collect other attributes of a container including: 429 ipv4: IP address for IPv4. 430 ipv6: IP address for IPv6. 431 autostart: If the container will autostart at system boot. 432 pid: Process ID of the container. 433 memory: Memory used by the container, as a string, e.g., "6.2MB" 434 ram: Physical ram used by the container, as a string, e.g., "6.2MB" 435 swap: swap used by the container, as a string, e.g., "1.0MB" 436 437 For performance reason, such info is not collected for now. 438 439 The attributes available are defined in ATTRIBUTES constant. 440 """ 441 442 def __init__(self, container_path, attribute_values): 443 """Initialize an object of LXC container with given attribute values. 444 445 @param container_path: Directory that stores the container. 446 @param attribute_values: A dictionary of attribute values for the 447 container. 448 """ 449 self.container_path = os.path.realpath(container_path) 450 # Path to the rootfs of the container. This will be initialized when 451 # property rootfs is retrieved. 452 self._rootfs = None 453 for attribute, value in attribute_values.iteritems(): 454 setattr(self, attribute, value) 455 456 457 def refresh_status(self): 458 """Refresh the status information of the container. 459 """ 460 containers = get_container_info(self.container_path, name=self.name) 461 if not containers: 462 raise error.ContainerError( 463 'No container found in directory %s with name of %s.' % 464 self.container_path, self.name) 465 attribute_values = containers[0] 466 for attribute, value in attribute_values.iteritems(): 467 setattr(self, attribute, value) 468 469 470 @property 471 def rootfs(self): 472 """Path to the rootfs of the container. 473 474 This property returns the path to the rootfs of the container, that is, 475 the folder where the container stores its local files. It reads the 476 attribute lxc.rootfs from the config file of the container, e.g., 477 lxc.rootfs = /usr/local/autotest/containers/t4/rootfs 478 If the container is created with snapshot, the rootfs is a chain of 479 folders, separated by `:` and ordered by how the snapshot is created, 480 e.g., 481 lxc.rootfs = overlayfs:/usr/local/autotest/containers/base/rootfs: 482 /usr/local/autotest/containers/t4_s/delta0 483 This function returns the last folder in the chain, in above example, 484 that is `/usr/local/autotest/containers/t4_s/delta0` 485 486 Files in the rootfs will be accessible directly within container. For 487 example, a folder in host "[rootfs]/usr/local/file1", can be accessed 488 inside container by path "/usr/local/file1". Note that symlink in the 489 host can not across host/container boundary, instead, directory mount 490 should be used, refer to function mount_dir. 491 492 @return: Path to the rootfs of the container. 493 """ 494 if not self._rootfs: 495 cmd = ('sudo lxc-info -P %s -n %s -c lxc.rootfs' % 496 (self.container_path, self.name)) 497 lxc_rootfs_config = utils.run(cmd).stdout.strip() 498 match = re.match('lxc.rootfs = (.*)', lxc_rootfs_config) 499 if not match: 500 raise error.ContainerError( 501 'Failed to locate rootfs for container %s. lxc.rootfs ' 502 'in the container config file is %s' % 503 (self.name, lxc_rootfs_config)) 504 lxc_rootfs = match.group(1) 505 self.clone_from_snapshot = ':' in lxc_rootfs 506 if self.clone_from_snapshot: 507 self._rootfs = lxc_rootfs.split(':')[-1] 508 else: 509 self._rootfs = lxc_rootfs 510 return self._rootfs 511 512 513 def attach_run(self, command, bash=True): 514 """Attach to a given container and run the given command. 515 516 @param command: Command to run in the container. 517 @param bash: Run the command through bash -c "command". This allows 518 pipes to be used in command. Default is set to True. 519 520 @return: The output of the command. 521 522 @raise error.CmdError: If container does not exist, or not running. 523 """ 524 cmd = 'sudo lxc-attach -P %s -n %s' % (self.container_path, self.name) 525 if bash and not command.startswith('bash -c'): 526 command = 'bash -c "%s"' % utils.sh_escape(command) 527 cmd += ' -- %s' % command 528 # TODO(dshi): crbug.com/459344 Set sudo to default to False when test 529 # container can be unprivileged container. 530 return utils.run(cmd) 531 532 533 def is_network_up(self): 534 """Check if network is up in the container by curl base container url. 535 536 @return: True if the network is up, otherwise False. 537 """ 538 try: 539 self.attach_run('curl --head %s' % CONTAINER_BASE_URL) 540 return True 541 except error.CmdError as e: 542 logging.debug(e) 543 return False 544 545 546 @metrics.SecondsTimerDecorator('%s/container_start_duration' % STATS_KEY) 547 def start(self, wait_for_network=True): 548 """Start the container. 549 550 @param wait_for_network: True to wait for network to be up. Default is 551 set to True. 552 553 @raise ContainerError: If container does not exist, or fails to start. 554 """ 555 cmd = 'sudo lxc-start -P %s -n %s -d' % (self.container_path, self.name) 556 output = utils.run(cmd).stdout 557 self.refresh_status() 558 if self.state != 'RUNNING': 559 raise error.ContainerError( 560 'Container %s failed to start. lxc command output:\n%s' % 561 (os.path.join(self.container_path, self.name), 562 output)) 563 564 if wait_for_network: 565 logging.debug('Wait for network to be up.') 566 start_time = time.time() 567 utils.poll_for_condition(condition=self.is_network_up, 568 timeout=NETWORK_INIT_TIMEOUT, 569 sleep_interval=NETWORK_INIT_CHECK_INTERVAL) 570 logging.debug('Network is up after %.2f seconds.', 571 time.time() - start_time) 572 573 574 @metrics.SecondsTimerDecorator('%s/container_stop_duration' % STATS_KEY) 575 def stop(self): 576 """Stop the container. 577 578 @raise ContainerError: If container does not exist, or fails to start. 579 """ 580 cmd = 'sudo lxc-stop -P %s -n %s' % (self.container_path, self.name) 581 output = utils.run(cmd).stdout 582 self.refresh_status() 583 if self.state != 'STOPPED': 584 raise error.ContainerError( 585 'Container %s failed to be stopped. lxc command output:\n' 586 '%s' % (os.path.join(self.container_path, self.name), 587 output)) 588 589 590 @metrics.SecondsTimerDecorator('%s/container_destroy_duration' % STATS_KEY) 591 def destroy(self, force=True): 592 """Destroy the container. 593 594 @param force: Set to True to force to destroy the container even if it's 595 running. This is faster than stop a container first then 596 try to destroy it. Default is set to True. 597 598 @raise ContainerError: If container does not exist or failed to destroy 599 the container. 600 """ 601 cmd = 'sudo lxc-destroy -P %s -n %s' % (self.container_path, 602 self.name) 603 if force: 604 cmd += ' -f' 605 utils.run(cmd) 606 607 608 def mount_dir(self, source, destination, readonly=False): 609 """Mount a directory in host to a directory in the container. 610 611 @param source: Directory in host to be mounted. 612 @param destination: Directory in container to mount the source directory 613 @param readonly: Set to True to make a readonly mount, default is False. 614 """ 615 # Destination path in container must be relative. 616 destination = destination.lstrip('/') 617 # Create directory in container for mount. 618 utils.run('sudo mkdir -p %s' % os.path.join(self.rootfs, destination)) 619 config_file = os.path.join(self.container_path, self.name, 'config') 620 mount = MOUNT_FMT % {'source': source, 621 'destination': destination, 622 'readonly': ',ro' if readonly else ''} 623 utils.run(APPEND_CMD_FMT % {'content': mount, 'file': config_file}) 624 625 626 def verify_autotest_setup(self, job_folder): 627 """Verify autotest code is set up properly in the container. 628 629 @param job_folder: Name of the job result folder. 630 631 @raise ContainerError: If autotest code is not set up properly. 632 """ 633 # Test autotest code is setup by verifying a list of 634 # (directory, minimum file count) 635 if IS_MOBLAB: 636 site_packages_path = MOBLAB_SITE_PACKAGES_CONTAINER 637 else: 638 site_packages_path = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 639 'site-packages') 640 directories_to_check = [ 641 (lxc_config.CONTAINER_AUTOTEST_DIR, 3), 642 (RESULT_DIR_FMT % job_folder, 0), 643 (site_packages_path, 3)] 644 for directory, count in directories_to_check: 645 result = self.attach_run(command=(COUNT_FILE_CMD % 646 {'dir': directory})).stdout 647 logging.debug('%s entries in %s.', int(result), directory) 648 if int(result) < count: 649 raise error.ContainerError('%s is not properly set up.' % 650 directory) 651 # lxc-attach and run command does not run in shell, thus .bashrc is not 652 # loaded. Following command creates a symlink in /usr/bin/ for gsutil 653 # if it's installed. 654 # TODO(dshi): Remove this code after lab container is updated with 655 # gsutil installed in /usr/bin/ 656 self.attach_run('test -f /root/gsutil/gsutil && ' 657 'ln -s /root/gsutil/gsutil /usr/bin/gsutil || true') 658 659 660 def modify_import_order(self): 661 """Swap the python import order of lib and local/lib. 662 663 In Moblab, the host's python modules located in 664 /usr/lib64/python2.7/site-packages is mounted to following folder inside 665 container: /usr/local/lib/python2.7/dist-packages/. The modules include 666 an old version of requests module, which is used in autotest 667 site-packages. For test, the module is only used in 668 dev_server/symbolicate_dump for requests.call and requests.codes.OK. 669 When pip is installed inside the container, it installs requests module 670 with version of 2.2.1 in /usr/lib/python2.7/dist-packages/. The version 671 is newer than the one used in autotest site-packages, but not the latest 672 either. 673 According to /usr/lib/python2.7/site.py, modules in /usr/local/lib are 674 imported before the ones in /usr/lib. That leads to pip to use the older 675 version of requests (0.11.2), and it will fail. On the other hand, 676 requests module 2.2.1 can't be installed in CrOS (refer to CL:265759), 677 and higher version of requests module can't work with pip. 678 The only fix to resolve this is to switch the import order, so modules 679 in /usr/lib can be imported before /usr/local/lib. 680 """ 681 site_module = '/usr/lib/python2.7/site.py' 682 self.attach_run("sed -i ':a;N;$!ba;s/\"local\/lib\",\\n/" 683 "\"lib_placeholder\",\\n/g' %s" % site_module) 684 self.attach_run("sed -i ':a;N;$!ba;s/\"lib\",\\n/" 685 "\"local\/lib\",\\n/g' %s" % site_module) 686 self.attach_run('sed -i "s/lib_placeholder/lib/g" %s' % 687 site_module) 688 689 690 691class ContainerBucket(object): 692 """A wrapper class to interact with containers in a specific container path. 693 """ 694 695 def __init__(self, container_path=DEFAULT_CONTAINER_PATH): 696 """Initialize a ContainerBucket. 697 698 @param container_path: Path to the directory used to store containers. 699 Default is set to AUTOSERV/container_path in 700 global config. 701 """ 702 self.container_path = os.path.realpath(container_path) 703 704 705 def get_all(self): 706 """Get details of all containers. 707 708 @return: A dictionary of all containers with detailed attributes, 709 indexed by container name. 710 """ 711 info_collection = get_container_info(self.container_path) 712 containers = {} 713 for info in info_collection: 714 container = Container(self.container_path, info) 715 containers[container.name] = container 716 return containers 717 718 719 def get(self, name): 720 """Get a container with matching name. 721 722 @param name: Name of the container. 723 724 @return: A container object with matching name. Returns None if no 725 container matches the given name. 726 """ 727 return self.get_all().get(name, None) 728 729 730 def exist(self, name): 731 """Check if a container exists with the given name. 732 733 @param name: Name of the container. 734 735 @return: True if the container with the given name exists, otherwise 736 returns False. 737 """ 738 return self.get(name) != None 739 740 741 def destroy_all(self): 742 """Destroy all containers, base must be destroyed at the last. 743 """ 744 containers = self.get_all().values() 745 for container in sorted(containers, 746 key=lambda n: 1 if n.name == BASE else 0): 747 logging.info('Destroy container %s.', container.name) 748 container.destroy() 749 750 751 @metrics.SecondsTimerDecorator('%s/create_from_base_duration' % STATS_KEY) 752 def create_from_base(self, name, disable_snapshot_clone=False, 753 force_cleanup=False): 754 """Create a container from the base container. 755 756 @param name: Name of the container. 757 @param disable_snapshot_clone: Set to True to force to clone without 758 using snapshot clone even if the host supports that. 759 @param force_cleanup: Force to cleanup existing container. 760 761 @return: A Container object for the created container. 762 763 @raise ContainerError: If the container already exist. 764 @raise error.CmdError: If lxc-clone call failed for any reason. 765 """ 766 if self.exist(name) and not force_cleanup: 767 raise error.ContainerError('Container %s already exists.' % name) 768 769 # Cleanup existing container with the given name. 770 container_folder = os.path.join(self.container_path, name) 771 if lxc_utils.path_exists(container_folder) and force_cleanup: 772 container = Container(self.container_path, {'name': name}) 773 try: 774 container.destroy() 775 except error.CmdError as e: 776 # The container could be created in a incompleted state. Delete 777 # the container folder instead. 778 logging.warn('Failed to destroy container %s, error: %s', 779 name, e) 780 utils.run('sudo rm -rf "%s"' % container_folder) 781 782 use_snapshot = SUPPORT_SNAPSHOT_CLONE and not disable_snapshot_clone 783 snapshot = '-s' if use_snapshot else '' 784 # overlayfs is the default clone backend storage. However it is not 785 # supported in Ganeti yet. Use aufs as the alternative. 786 aufs = '-B aufs' if utils.is_vm() and use_snapshot else '' 787 cmd = ('sudo lxc-clone -p %s -P %s %s' % 788 (self.container_path, self.container_path, 789 ' '.join([BASE, name, snapshot, aufs]))) 790 try: 791 utils.run(cmd) 792 return self.get(name) 793 except error.CmdError: 794 if not use_snapshot: 795 raise 796 else: 797 # Snapshot clone failed, retry clone without snapshot. The retry 798 # won't hit the code here and cause an infinite loop as 799 # disable_snapshot_clone is set to True. 800 container = self.create_from_base( 801 name, disable_snapshot_clone=True, force_cleanup=True) 802 # Report metadata about retry success. 803 autotest_es.post(use_http=True, 804 type_str=CONTAINER_CREATE_RETRY_METADB_TYPE, 805 metadata={'drone': socket.gethostname(), 806 'name': name, 807 'success': True}) 808 return container 809 810 811 @cleanup_if_fail() 812 def setup_base(self, name=BASE, force_delete=False): 813 """Setup base container. 814 815 @param name: Name of the base container, default to base. 816 @param force_delete: True to force to delete existing base container. 817 This action will destroy all running test 818 containers. Default is set to False. 819 """ 820 if not self.container_path: 821 raise error.ContainerError( 822 'You must set a valid directory to store containers in ' 823 'global config "AUTOSERV/ container_path".') 824 825 if not os.path.exists(self.container_path): 826 os.makedirs(self.container_path) 827 828 base_path = os.path.join(self.container_path, name) 829 if self.exist(name) and not force_delete: 830 logging.error( 831 'Base container already exists. Set force_delete to True ' 832 'to force to re-stage base container. Note that this ' 833 'action will destroy all running test containers') 834 # Set proper file permission. base container in moblab may have 835 # owner of not being root. Force to update the folder's owner. 836 # TODO(dshi): Change root to current user when test container can be 837 # unprivileged container. 838 utils.run('sudo chown -R root "%s"' % base_path) 839 utils.run('sudo chgrp -R root "%s"' % base_path) 840 return 841 842 # Destroy existing base container if exists. 843 if self.exist(name): 844 # TODO: We may need to destroy all snapshots created from this base 845 # container, not all container. 846 self.destroy_all() 847 848 # Download and untar the base container. 849 tar_path = os.path.join(self.container_path, '%s.tar.xz' % name) 850 path_to_cleanup = [tar_path, base_path] 851 for path in path_to_cleanup: 852 if os.path.exists(path): 853 utils.run('sudo rm -rf "%s"' % path) 854 container_url = CONTAINER_BASE_URL_FMT % name 855 download_extract(container_url, tar_path, self.container_path) 856 # Remove the downloaded container tar file. 857 utils.run('sudo rm "%s"' % tar_path) 858 # Set proper file permission. 859 # TODO(dshi): Change root to current user when test container can be 860 # unprivileged container. 861 utils.run('sudo chown -R root "%s"' % base_path) 862 utils.run('sudo chgrp -R root "%s"' % base_path) 863 864 # Update container config with container_path from global config. 865 config_path = os.path.join(base_path, 'config') 866 utils.run('sudo sed -i "s|container_dir|%s|g" "%s"' % 867 (self.container_path, config_path)) 868 869 870 @metrics.SecondsTimerDecorator('%s/setup_test_duration' % STATS_KEY) 871 @cleanup_if_fail() 872 def setup_test(self, name, job_id, server_package_url, result_path, 873 control=None, skip_cleanup=False, job_folder=None, 874 dut_name=None): 875 """Setup test container for the test job to run. 876 877 The setup includes: 878 1. Install autotest_server package from given url. 879 2. Copy over local shadow_config.ini. 880 3. Mount local site-packages. 881 4. Mount test result directory. 882 883 TODO(dshi): Setup also needs to include test control file for autoserv 884 to run in container. 885 886 @param name: Name of the container. 887 @param job_id: Job id for the test job to run in the test container. 888 @param server_package_url: Url to download autotest_server package. 889 @param result_path: Directory to be mounted to container to store test 890 results. 891 @param control: Path to the control file to run the test job. Default is 892 set to None. 893 @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot 894 container failures. 895 @param job_folder: Folder name of the job, e.g., 123-debug_user. 896 @param dut_name: Name of the dut to run test, used as the hostname of 897 the container. Default is None. 898 @return: A Container object for the test container. 899 900 @raise ContainerError: If container does not exist, or not running. 901 """ 902 start_time = time.time() 903 904 if not os.path.exists(result_path): 905 raise error.ContainerError('Result directory does not exist: %s', 906 result_path) 907 result_path = os.path.abspath(result_path) 908 909 # Save control file to result_path temporarily. The reason is that the 910 # control file in drone_tmp folder can be deleted during scheduler 911 # restart. For test not using SSP, the window between test starts and 912 # control file being picked up by the test is very small (< 2 seconds). 913 # However, for tests using SSP, it takes around 1 minute before the 914 # container is setup. If scheduler is restarted during that period, the 915 # control file will be deleted, and the test will fail. 916 if control: 917 control_file_name = os.path.basename(control) 918 safe_control = os.path.join(result_path, control_file_name) 919 utils.run('cp %s %s' % (control, safe_control)) 920 921 # Create test container from the base container. 922 container = self.create_from_base(name) 923 924 # Update the hostname of the test container to be `dut_name`. 925 # Some TradeFed tests use hostname in test results, which is used to 926 # group test results in dashboard. The default container name is set to 927 # be the name of the folder, which is unique (as it is composed of job 928 # id and timestamp. For better result view, the container's hostname is 929 # set to be a string containing the dut hostname. 930 if dut_name: 931 config_file = os.path.join(container.container_path, name, 'config') 932 lxc_utsname_setting = ( 933 'lxc.utsname = ' + 934 CONTAINER_UTSNAME_FORMAT % dut_name.replace('.', '_')) 935 utils.run(APPEND_CMD_FMT % {'content': lxc_utsname_setting, 936 'file': config_file}) 937 938 # Deploy server side package 939 usr_local_path = os.path.join(container.rootfs, 'usr', 'local') 940 autotest_pkg_path = os.path.join(usr_local_path, 941 'autotest_server_package.tar.bz2') 942 autotest_path = os.path.join(usr_local_path, 'autotest') 943 # sudo is required so os.makedirs may not work. 944 utils.run('sudo mkdir -p %s'% usr_local_path) 945 946 download_extract(server_package_url, autotest_pkg_path, usr_local_path) 947 deploy_config_manager = lxc_config.DeployConfigManager(container) 948 deploy_config_manager.deploy_pre_start() 949 950 # Copy over control file to run the test job. 951 if control: 952 container_drone_temp = os.path.join(autotest_path, 'drone_tmp') 953 utils.run('sudo mkdir -p %s'% container_drone_temp) 954 container_control_file = os.path.join( 955 container_drone_temp, control_file_name) 956 # Move the control file stored in the result folder to container. 957 utils.run('sudo mv %s %s' % (safe_control, container_control_file)) 958 959 if IS_MOBLAB: 960 site_packages_path = MOBLAB_SITE_PACKAGES 961 site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:] 962 else: 963 site_packages_path = os.path.join(common.autotest_dir, 964 'site-packages') 965 site_packages_container_path = os.path.join( 966 lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages') 967 mount_entries = [(site_packages_path, site_packages_container_path, 968 True), 969 (os.path.join(common.autotest_dir, 'puppylab'), 970 os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 971 'puppylab'), 972 True), 973 (result_path, 974 os.path.join(RESULT_DIR_FMT % job_folder), 975 False), 976 ] 977 for mount_config in deploy_config_manager.mount_configs: 978 mount_entries.append((mount_config.source, mount_config.target, 979 mount_config.readonly)) 980 # Update container config to mount directories. 981 for source, destination, readonly in mount_entries: 982 container.mount_dir(source, destination, readonly) 983 984 # Update file permissions. 985 # TODO(dshi): crbug.com/459344 Skip following action when test container 986 # can be unprivileged container. 987 utils.run('sudo chown -R root "%s"' % autotest_path) 988 utils.run('sudo chgrp -R root "%s"' % autotest_path) 989 990 container.start(name) 991 deploy_config_manager.deploy_post_start() 992 993 container.modify_import_order() 994 995 container.verify_autotest_setup(job_folder) 996 997 autotest_es.post(use_http=True, 998 type_str=CONTAINER_CREATE_METADB_TYPE, 999 metadata={'drone': socket.gethostname(), 1000 'job_id': job_id, 1001 'time_used': time.time() - start_time, 1002 'success': True}) 1003 1004 logging.debug('Test container %s is set up.', name) 1005 return container 1006 1007 1008def parse_options(): 1009 """Parse command line inputs. 1010 1011 @raise argparse.ArgumentError: If command line arguments are invalid. 1012 """ 1013 parser = argparse.ArgumentParser() 1014 parser.add_argument('-s', '--setup', action='store_true', 1015 default=False, 1016 help='Set up base container.') 1017 parser.add_argument('-p', '--path', type=str, 1018 help='Directory to store the container.', 1019 default=DEFAULT_CONTAINER_PATH) 1020 parser.add_argument('-f', '--force_delete', action='store_true', 1021 default=False, 1022 help=('Force to delete existing containers and rebuild ' 1023 'base containers.')) 1024 parser.add_argument('-n', '--name', type=str, 1025 help='Name of the base container.', 1026 default=BASE) 1027 options = parser.parse_args() 1028 if not options.setup and not options.force_delete: 1029 raise argparse.ArgumentError( 1030 'Use --setup to setup a base container, or --force_delete to ' 1031 'delete all containers in given path.') 1032 return options 1033 1034 1035def main(): 1036 """main script.""" 1037 # Force to run the setup as superuser. 1038 # TODO(dshi): crbug.com/459344 Set remove this enforcement when test 1039 # container can be unprivileged container. 1040 if utils.sudo_require_password(): 1041 logging.warn('SSP requires root privilege to run commands, please ' 1042 'grant root access to this process.') 1043 utils.run('sudo true') 1044 1045 options = parse_options() 1046 bucket = ContainerBucket(container_path=options.path) 1047 if options.setup: 1048 bucket.setup_base(name=options.name, force_delete=options.force_delete) 1049 elif options.force_delete: 1050 bucket.destroy_all() 1051 1052 1053if __name__ == '__main__': 1054 main() 1055