1# Copyright 2015 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""This module provides some tools to interact with LXC containers, for example:
6  1. Download base container from given GS location, setup the base container.
7  2. Create a snapshot as test container from base container.
8  3. Mount a directory in drone to the test container.
9  4. Run a command in the container and return the output.
10  5. Cleanup, e.g., destroy the container.
11
12This tool can also be used to set up a base container for test. For example,
13  python lxc.py -s -p /tmp/container
14This command will download and setup base container in directory /tmp/container.
15After that command finishes, you can run lxc command to work with the base
16container, e.g.,
17  lxc-start -P /tmp/container -n base -d
18  lxc-attach -P /tmp/container -n base
19"""
20
21
22import argparse
23import logging
24import os
25import re
26import socket
27import sys
28import time
29
30import common
31from autotest_lib.client.bin import utils
32from autotest_lib.client.common_lib import error
33from autotest_lib.client.common_lib import global_config
34from autotest_lib.client.common_lib.cros import dev_server
35from autotest_lib.client.common_lib.cros import retry
36from autotest_lib.client.common_lib.cros.graphite import autotest_es
37from autotest_lib.server import utils as server_utils
38from autotest_lib.site_utils import lxc_config
39from autotest_lib.site_utils import lxc_utils
40
41try:
42    from chromite.lib import metrics
43except ImportError:
44    metrics = utils.metrics_mock
45
46
47config = global_config.global_config
48
49# Name of the base container.
50BASE = config.get_config_value('AUTOSERV', 'container_base_name')
51# Naming convention of test container, e.g., test_300_1422862512_2424, where:
52# 300:        The test job ID.
53# 1422862512: The tick when container is created.
54# 2424:       The PID of autoserv that starts the container.
55TEST_CONTAINER_NAME_FMT = 'test_%s_%d_%d'
56# Naming convention of the result directory in test container.
57RESULT_DIR_FMT = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'results',
58                              '%s')
59# Attributes to retrieve about containers.
60ATTRIBUTES = ['name', 'state']
61
62# Format for mount entry to share a directory in host with container.
63# source is the directory in host, destination is the directory in container.
64# readonly is a binding flag for readonly mount, its value should be `,ro`.
65MOUNT_FMT = ('lxc.mount.entry = %(source)s %(destination)s none '
66             'bind%(readonly)s 0 0')
67SSP_ENABLED = config.get_config_value('AUTOSERV', 'enable_ssp_container',
68                                      type=bool, default=True)
69# url to the folder stores base container.
70CONTAINER_BASE_FOLDER_URL = config.get_config_value('AUTOSERV',
71                                                    'container_base_folder_url')
72CONTAINER_BASE_URL_FMT = '%s/%%s.tar.xz' % CONTAINER_BASE_FOLDER_URL
73CONTAINER_BASE_URL = CONTAINER_BASE_URL_FMT % BASE
74# Default directory used to store LXC containers.
75DEFAULT_CONTAINER_PATH = config.get_config_value('AUTOSERV', 'container_path')
76
77# Path to drone_temp folder in the container, which stores the control file for
78# test job to run.
79CONTROL_TEMP_PATH = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR, 'drone_tmp')
80
81# Bash command to return the file count in a directory. Test the existence first
82# so the command can return an error code if the directory doesn't exist.
83COUNT_FILE_CMD = '[ -d %(dir)s ] && ls %(dir)s | wc -l'
84
85# Command line to append content to a file
86APPEND_CMD_FMT = ('echo \'%(content)s\' | sudo tee --append %(file)s'
87                  '> /dev/null')
88
89# Path to site-packates in Moblab
90MOBLAB_SITE_PACKAGES = '/usr/lib64/python2.7/site-packages'
91MOBLAB_SITE_PACKAGES_CONTAINER = '/usr/local/lib/python2.7/dist-packages/'
92
93# Flag to indicate it's running in a Moblab. Due to crbug.com/457496, lxc-ls has
94# different behavior in Moblab.
95IS_MOBLAB = utils.is_moblab()
96
97# TODO(dshi): If we are adding more logic in how lxc should interact with
98# different systems, we should consider code refactoring to use a setting-style
99# object to store following flags mapping to different systems.
100# TODO(crbug.com/464834): Snapshot clone is disabled until Moblab can
101# support overlayfs or aufs, which requires a newer kernel.
102SUPPORT_SNAPSHOT_CLONE = not IS_MOBLAB
103
104# Number of seconds to wait for network to be up in a container.
105NETWORK_INIT_TIMEOUT = 300
106# Network bring up is slower in Moblab.
107NETWORK_INIT_CHECK_INTERVAL = 2 if IS_MOBLAB else 0.1
108
109# Type string for container related metadata.
110CONTAINER_CREATE_METADB_TYPE = 'container_create'
111CONTAINER_CREATE_RETRY_METADB_TYPE = 'container_create_retry'
112CONTAINER_RUN_TEST_METADB_TYPE = 'container_run_test'
113
114# The container's hostname MUST start with `test_`. DHCP server in MobLab uses
115# that prefix to determine the lease time.
116CONTAINER_UTSNAME_FORMAT = 'test_%s'
117
118STATS_KEY = 'chromeos/autotest/lxc'
119
120
121def _get_container_info_moblab(container_path, **filters):
122    """Get a collection of container information in the given container path
123    in a Moblab.
124
125    TODO(crbug.com/457496): remove this method once python 3 can be installed
126    in Moblab and lxc-ls command can use python 3 code.
127
128    When running in Moblab, lxc-ls behaves differently from a server with python
129    3 installed:
130    1. lxc-ls returns a list of containers installed under /etc/lxc, the default
131       lxc container directory.
132    2. lxc-ls --active lists all active containers, regardless where the
133       container is located.
134    For such differences, we have to special case Moblab to make the behavior
135    close to a server with python 3 installed. That is,
136    1. List only containers in a given folder.
137    2. Assume all active containers have state of RUNNING.
138
139    @param container_path: Path to look for containers.
140    @param filters: Key value to filter the containers, e.g., name='base'
141
142    @return: A list of dictionaries that each dictionary has the information of
143             a container. The keys are defined in ATTRIBUTES.
144    """
145    info_collection = []
146    active_containers = utils.run('sudo lxc-ls --active').stdout.split()
147    name_filter = filters.get('name', None)
148    state_filter = filters.get('state', None)
149    if filters and set(filters.keys()) - set(['name', 'state']):
150        raise error.ContainerError('When running in Moblab, container list '
151                                   'filter only supports name and state.')
152
153    for name in os.listdir(container_path):
154        # Skip all files and folders without rootfs subfolder.
155        if (os.path.isfile(os.path.join(container_path, name)) or
156            not lxc_utils.path_exists(os.path.join(container_path, name,
157                                                   'rootfs'))):
158            continue
159        info = {'name': name,
160                'state': 'RUNNING' if name in active_containers else 'STOPPED'
161               }
162        if ((name_filter and name_filter != info['name']) or
163            (state_filter and state_filter != info['state'])):
164            continue
165
166        info_collection.append(info)
167    return info_collection
168
169
170def get_container_info(container_path, **filters):
171    """Get a collection of container information in the given container path.
172
173    This method parse the output of lxc-ls to get a list of container
174    information. The lxc-ls command output looks like:
175    NAME      STATE    IPV4       IPV6  AUTOSTART  PID   MEMORY  RAM     SWAP
176    --------------------------------------------------------------------------
177    base      STOPPED  -          -     NO         -     -       -       -
178    test_123  RUNNING  10.0.3.27  -     NO         8359  6.28MB  6.28MB  0.0MB
179
180    @param container_path: Path to look for containers.
181    @param filters: Key value to filter the containers, e.g., name='base'
182
183    @return: A list of dictionaries that each dictionary has the information of
184             a container. The keys are defined in ATTRIBUTES.
185    """
186    if IS_MOBLAB:
187        return _get_container_info_moblab(container_path, **filters)
188
189    cmd = 'sudo lxc-ls -P %s -f -F %s' % (os.path.realpath(container_path),
190                                          ','.join(ATTRIBUTES))
191    output = utils.run(cmd).stdout
192    info_collection = []
193
194    for line in output.splitlines()[1:]:
195        # Only LXC 1.x has the second line of '-' as a separator.
196        if line.startswith('------'):
197            continue
198        info_collection.append(dict(zip(ATTRIBUTES, line.split())))
199    if filters:
200        filtered_collection = []
201        for key, value in filters.iteritems():
202            for info in info_collection:
203                if key in info and info[key] == value:
204                    filtered_collection.append(info)
205        info_collection = filtered_collection
206    return info_collection
207
208
209def cleanup_if_fail():
210    """Decorator to do cleanup if container fails to be set up.
211    """
212    def deco_cleanup_if_fail(func):
213        """Wrapper for the decorator.
214
215        @param func: Function to be called.
216        """
217        def func_cleanup_if_fail(*args, **kwargs):
218            """Decorator to do cleanup if container fails to be set up.
219
220            The first argument must be a ContainerBucket object, which can be
221            used to retrieve the container object by name.
222
223            @param func: function to be called.
224            @param args: arguments for function to be called.
225            @param kwargs: keyword arguments for function to be called.
226            """
227            bucket = args[0]
228            name = utils.get_function_arg_value(func, 'name', args, kwargs)
229            try:
230                skip_cleanup = utils.get_function_arg_value(
231                        func, 'skip_cleanup', args, kwargs)
232            except (KeyError, ValueError):
233                skip_cleanup = False
234            try:
235                return func(*args, **kwargs)
236            except:
237                exc_info = sys.exc_info()
238                try:
239                    container = bucket.get(name)
240                    if container and not skip_cleanup:
241                        container.destroy()
242                except error.CmdError as e:
243                    logging.error(e)
244
245                try:
246                    job_id = utils.get_function_arg_value(
247                            func, 'job_id', args, kwargs)
248                except (KeyError, ValueError):
249                    job_id = ''
250                metadata={'drone': socket.gethostname(),
251                          'job_id': job_id,
252                          'success': False}
253                # Record all args if job_id is not available.
254                if not job_id:
255                    metadata['args'] = str(args)
256                    if kwargs:
257                        metadata.update(kwargs)
258                autotest_es.post(use_http=True,
259                                 type_str=CONTAINER_CREATE_METADB_TYPE,
260                                 metadata=metadata)
261
262                # Raise the cached exception with original backtrace.
263                raise exc_info[0], exc_info[1], exc_info[2]
264        return func_cleanup_if_fail
265    return deco_cleanup_if_fail
266
267
268@retry.retry(error.CmdError, timeout_min=5)
269def download_extract(url, target, extract_dir):
270    """Download the file from given url and save it to the target, then extract.
271
272    @param url: Url to download the file.
273    @param target: Path of the file to save to.
274    @param extract_dir: Directory to extract the content of the file to.
275    """
276    remote_url = dev_server.DevServer.get_server_url(url)
277    # TODO(xixuan): Better to only ssh to devservers in lab, and continue using
278    # wget for ganeti devservers.
279    if remote_url in dev_server.ImageServerBase.servers():
280        tmp_file = '/tmp/%s' % os.path.basename(target)
281        dev_server.ImageServerBase.download_file(url, tmp_file, timeout=300)
282        utils.run('sudo mv %s %s' % (tmp_file, target))
283    else:
284        utils.run('sudo wget --timeout=300 -nv %s -O %s' % (url, target),
285                  stderr_tee=utils.TEE_TO_LOGS)
286
287    utils.run('sudo tar -xvf %s -C %s' % (target, extract_dir))
288
289
290def install_package_precheck(packages):
291    """If SSP is not enabled or the test is running in chroot (using test_that),
292    packages installation should be skipped.
293
294    The check does not raise exception so tests started by test_that or running
295    in an Autotest setup with SSP disabled can continue. That assume the running
296    environment, chroot or a machine, has the desired packages installed
297    already.
298
299    @param packages: A list of names of the packages to install.
300
301    @return: True if package installation can continue. False if it should be
302             skipped.
303
304    """
305    if not SSP_ENABLED and not utils.is_in_container():
306        logging.info('Server-side packaging is not enabled. Install package %s '
307                     'is skipped.', packages)
308        return False
309
310    if server_utils.is_inside_chroot():
311        logging.info('Test is running inside chroot. Install package %s is '
312                     'skipped.', packages)
313        return False
314
315    if not utils.is_in_container():
316        raise error.ContainerError('Package installation is only supported '
317                                   'when test is running inside container.')
318
319    return True
320
321
322@metrics.SecondsTimerDecorator('%s/install_packages_duration' % STATS_KEY)
323@retry.retry(error.CmdError, timeout_min=30)
324def install_packages(packages=[], python_packages=[], force_latest=False):
325    """Install the given package inside container.
326
327    !!! WARNING !!!
328    This call may introduce several minutes of delay in test run. The best way
329    to avoid such delay is to update the base container used for the test run.
330    File a bug for infra deputy to update the base container with the new
331    package a test requires.
332
333    @param packages: A list of names of the packages to install.
334    @param python_packages: A list of names of the python packages to install
335                            using pip.
336    @param force_latest: True to force to install the latest version of the
337                         package. Default to False, which means skip installing
338                         the package if it's installed already, even with an old
339                         version.
340
341    @raise error.ContainerError: If package is attempted to be installed outside
342                                 a container.
343    @raise error.CmdError: If the package doesn't exist or failed to install.
344
345    """
346    if not install_package_precheck(packages or python_packages):
347        return
348
349    # If force_latest is False, only install packages that are not already
350    # installed.
351    if not force_latest:
352        packages = [p for p in packages if not utils.is_package_installed(p)]
353        python_packages = [p for p in python_packages
354                           if not utils.is_python_package_installed(p)]
355        if not packages and not python_packages:
356            logging.debug('All packages are installed already, skip reinstall.')
357            return
358
359    # Always run apt-get update before installing any container. The base
360    # container may have outdated cache.
361    utils.run('sudo apt-get update')
362    # Make sure the lists are not None for iteration.
363    packages = [] if not packages else packages
364    if python_packages:
365        packages.extend(['python-pip', 'python-dev'])
366    if packages:
367        utils.run('sudo apt-get install %s -y --force-yes' % ' '.join(packages))
368        logging.debug('Packages are installed: %s.', packages)
369
370    target_setting = ''
371    # For containers running in Moblab, /usr/local/lib/python2.7/dist-packages/
372    # is a readonly mount from the host. Therefore, new python modules have to
373    # be installed in /usr/lib/python2.7/dist-packages/
374    # Containers created in Moblab does not have autotest/site-packages folder.
375    if not os.path.exists('/usr/local/autotest/site-packages'):
376        target_setting = '--target="/usr/lib/python2.7/dist-packages/"'
377    if python_packages:
378        utils.run('sudo pip install %s %s' % (target_setting,
379                                              ' '.join(python_packages)))
380        logging.debug('Python packages are installed: %s.', python_packages)
381
382
383@retry.retry(error.CmdError, timeout_min=20)
384def install_package(package):
385    """Install the given package inside container.
386
387    This function is kept for backwards compatibility reason. New code should
388    use function install_packages for better performance.
389
390    @param package: Name of the package to install.
391
392    @raise error.ContainerError: If package is attempted to be installed outside
393                                 a container.
394    @raise error.CmdError: If the package doesn't exist or failed to install.
395
396    """
397    logging.warn('This function is obsoleted, please use install_packages '
398                 'instead.')
399    install_packages(packages=[package])
400
401
402@retry.retry(error.CmdError, timeout_min=20)
403def install_python_package(package):
404    """Install the given python package inside container using pip.
405
406    This function is kept for backwards compatibility reason. New code should
407    use function install_packages for better performance.
408
409    @param package: Name of the python package to install.
410
411    @raise error.CmdError: If the package doesn't exist or failed to install.
412    """
413    logging.warn('This function is obsoleted, please use install_packages '
414                 'instead.')
415    install_packages(python_packages=[package])
416
417
418class Container(object):
419    """A wrapper class of an LXC container.
420
421    The wrapper class provides methods to interact with a container, e.g.,
422    start, stop, destroy, run a command. It also has attributes of the
423    container, including:
424    name: Name of the container.
425    state: State of the container, e.g., ABORTING, RUNNING, STARTING, STOPPED,
426           or STOPPING.
427
428    lxc-ls can also collect other attributes of a container including:
429    ipv4: IP address for IPv4.
430    ipv6: IP address for IPv6.
431    autostart: If the container will autostart at system boot.
432    pid: Process ID of the container.
433    memory: Memory used by the container, as a string, e.g., "6.2MB"
434    ram: Physical ram used by the container, as a string, e.g., "6.2MB"
435    swap: swap used by the container, as a string, e.g., "1.0MB"
436
437    For performance reason, such info is not collected for now.
438
439    The attributes available are defined in ATTRIBUTES constant.
440    """
441
442    def __init__(self, container_path, attribute_values):
443        """Initialize an object of LXC container with given attribute values.
444
445        @param container_path: Directory that stores the container.
446        @param attribute_values: A dictionary of attribute values for the
447                                 container.
448        """
449        self.container_path = os.path.realpath(container_path)
450        # Path to the rootfs of the container. This will be initialized when
451        # property rootfs is retrieved.
452        self._rootfs = None
453        for attribute, value in attribute_values.iteritems():
454            setattr(self, attribute, value)
455
456
457    def refresh_status(self):
458        """Refresh the status information of the container.
459        """
460        containers = get_container_info(self.container_path, name=self.name)
461        if not containers:
462            raise error.ContainerError(
463                    'No container found in directory %s with name of %s.' %
464                    self.container_path, self.name)
465        attribute_values = containers[0]
466        for attribute, value in attribute_values.iteritems():
467            setattr(self, attribute, value)
468
469
470    @property
471    def rootfs(self):
472        """Path to the rootfs of the container.
473
474        This property returns the path to the rootfs of the container, that is,
475        the folder where the container stores its local files. It reads the
476        attribute lxc.rootfs from the config file of the container, e.g.,
477            lxc.rootfs = /usr/local/autotest/containers/t4/rootfs
478        If the container is created with snapshot, the rootfs is a chain of
479        folders, separated by `:` and ordered by how the snapshot is created,
480        e.g.,
481            lxc.rootfs = overlayfs:/usr/local/autotest/containers/base/rootfs:
482            /usr/local/autotest/containers/t4_s/delta0
483        This function returns the last folder in the chain, in above example,
484        that is `/usr/local/autotest/containers/t4_s/delta0`
485
486        Files in the rootfs will be accessible directly within container. For
487        example, a folder in host "[rootfs]/usr/local/file1", can be accessed
488        inside container by path "/usr/local/file1". Note that symlink in the
489        host can not across host/container boundary, instead, directory mount
490        should be used, refer to function mount_dir.
491
492        @return: Path to the rootfs of the container.
493        """
494        if not self._rootfs:
495            cmd = ('sudo lxc-info -P %s -n %s -c lxc.rootfs' %
496                   (self.container_path, self.name))
497            lxc_rootfs_config = utils.run(cmd).stdout.strip()
498            match = re.match('lxc.rootfs = (.*)', lxc_rootfs_config)
499            if not match:
500                raise error.ContainerError(
501                        'Failed to locate rootfs for container %s. lxc.rootfs '
502                        'in the container config file is %s' %
503                        (self.name, lxc_rootfs_config))
504            lxc_rootfs = match.group(1)
505            self.clone_from_snapshot = ':' in lxc_rootfs
506            if self.clone_from_snapshot:
507                self._rootfs = lxc_rootfs.split(':')[-1]
508            else:
509                self._rootfs = lxc_rootfs
510        return self._rootfs
511
512
513    def attach_run(self, command, bash=True):
514        """Attach to a given container and run the given command.
515
516        @param command: Command to run in the container.
517        @param bash: Run the command through bash -c "command". This allows
518                     pipes to be used in command. Default is set to True.
519
520        @return: The output of the command.
521
522        @raise error.CmdError: If container does not exist, or not running.
523        """
524        cmd = 'sudo lxc-attach -P %s -n %s' % (self.container_path, self.name)
525        if bash and not command.startswith('bash -c'):
526            command = 'bash -c "%s"' % utils.sh_escape(command)
527        cmd += ' -- %s' % command
528        # TODO(dshi): crbug.com/459344 Set sudo to default to False when test
529        # container can be unprivileged container.
530        return utils.run(cmd)
531
532
533    def is_network_up(self):
534        """Check if network is up in the container by curl base container url.
535
536        @return: True if the network is up, otherwise False.
537        """
538        try:
539            self.attach_run('curl --head %s' % CONTAINER_BASE_URL)
540            return True
541        except error.CmdError as e:
542            logging.debug(e)
543            return False
544
545
546    @metrics.SecondsTimerDecorator('%s/container_start_duration' % STATS_KEY)
547    def start(self, wait_for_network=True):
548        """Start the container.
549
550        @param wait_for_network: True to wait for network to be up. Default is
551                                 set to True.
552
553        @raise ContainerError: If container does not exist, or fails to start.
554        """
555        cmd = 'sudo lxc-start -P %s -n %s -d' % (self.container_path, self.name)
556        output = utils.run(cmd).stdout
557        self.refresh_status()
558        if self.state != 'RUNNING':
559            raise error.ContainerError(
560                    'Container %s failed to start. lxc command output:\n%s' %
561                    (os.path.join(self.container_path, self.name),
562                     output))
563
564        if wait_for_network:
565            logging.debug('Wait for network to be up.')
566            start_time = time.time()
567            utils.poll_for_condition(condition=self.is_network_up,
568                                     timeout=NETWORK_INIT_TIMEOUT,
569                                     sleep_interval=NETWORK_INIT_CHECK_INTERVAL)
570            logging.debug('Network is up after %.2f seconds.',
571                          time.time() - start_time)
572
573
574    @metrics.SecondsTimerDecorator('%s/container_stop_duration' % STATS_KEY)
575    def stop(self):
576        """Stop the container.
577
578        @raise ContainerError: If container does not exist, or fails to start.
579        """
580        cmd = 'sudo lxc-stop -P %s -n %s' % (self.container_path, self.name)
581        output = utils.run(cmd).stdout
582        self.refresh_status()
583        if self.state != 'STOPPED':
584            raise error.ContainerError(
585                    'Container %s failed to be stopped. lxc command output:\n'
586                    '%s' % (os.path.join(self.container_path, self.name),
587                            output))
588
589
590    @metrics.SecondsTimerDecorator('%s/container_destroy_duration' % STATS_KEY)
591    def destroy(self, force=True):
592        """Destroy the container.
593
594        @param force: Set to True to force to destroy the container even if it's
595                      running. This is faster than stop a container first then
596                      try to destroy it. Default is set to True.
597
598        @raise ContainerError: If container does not exist or failed to destroy
599                               the container.
600        """
601        cmd = 'sudo lxc-destroy -P %s -n %s' % (self.container_path,
602                                                self.name)
603        if force:
604            cmd += ' -f'
605        utils.run(cmd)
606
607
608    def mount_dir(self, source, destination, readonly=False):
609        """Mount a directory in host to a directory in the container.
610
611        @param source: Directory in host to be mounted.
612        @param destination: Directory in container to mount the source directory
613        @param readonly: Set to True to make a readonly mount, default is False.
614        """
615        # Destination path in container must be relative.
616        destination = destination.lstrip('/')
617        # Create directory in container for mount.
618        utils.run('sudo mkdir -p %s' % os.path.join(self.rootfs, destination))
619        config_file = os.path.join(self.container_path, self.name, 'config')
620        mount = MOUNT_FMT % {'source': source,
621                             'destination': destination,
622                             'readonly': ',ro' if readonly else ''}
623        utils.run(APPEND_CMD_FMT % {'content': mount, 'file': config_file})
624
625
626    def verify_autotest_setup(self, job_folder):
627        """Verify autotest code is set up properly in the container.
628
629        @param job_folder: Name of the job result folder.
630
631        @raise ContainerError: If autotest code is not set up properly.
632        """
633        # Test autotest code is setup by verifying a list of
634        # (directory, minimum file count)
635        if IS_MOBLAB:
636            site_packages_path = MOBLAB_SITE_PACKAGES_CONTAINER
637        else:
638            site_packages_path = os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
639                                              'site-packages')
640        directories_to_check = [
641                (lxc_config.CONTAINER_AUTOTEST_DIR, 3),
642                (RESULT_DIR_FMT % job_folder, 0),
643                (site_packages_path, 3)]
644        for directory, count in directories_to_check:
645            result = self.attach_run(command=(COUNT_FILE_CMD %
646                                              {'dir': directory})).stdout
647            logging.debug('%s entries in %s.', int(result), directory)
648            if int(result) < count:
649                raise error.ContainerError('%s is not properly set up.' %
650                                           directory)
651        # lxc-attach and run command does not run in shell, thus .bashrc is not
652        # loaded. Following command creates a symlink in /usr/bin/ for gsutil
653        # if it's installed.
654        # TODO(dshi): Remove this code after lab container is updated with
655        # gsutil installed in /usr/bin/
656        self.attach_run('test -f /root/gsutil/gsutil && '
657                        'ln -s /root/gsutil/gsutil /usr/bin/gsutil || true')
658
659
660    def modify_import_order(self):
661        """Swap the python import order of lib and local/lib.
662
663        In Moblab, the host's python modules located in
664        /usr/lib64/python2.7/site-packages is mounted to following folder inside
665        container: /usr/local/lib/python2.7/dist-packages/. The modules include
666        an old version of requests module, which is used in autotest
667        site-packages. For test, the module is only used in
668        dev_server/symbolicate_dump for requests.call and requests.codes.OK.
669        When pip is installed inside the container, it installs requests module
670        with version of 2.2.1 in /usr/lib/python2.7/dist-packages/. The version
671        is newer than the one used in autotest site-packages, but not the latest
672        either.
673        According to /usr/lib/python2.7/site.py, modules in /usr/local/lib are
674        imported before the ones in /usr/lib. That leads to pip to use the older
675        version of requests (0.11.2), and it will fail. On the other hand,
676        requests module 2.2.1 can't be installed in CrOS (refer to CL:265759),
677        and higher version of requests module can't work with pip.
678        The only fix to resolve this is to switch the import order, so modules
679        in /usr/lib can be imported before /usr/local/lib.
680        """
681        site_module = '/usr/lib/python2.7/site.py'
682        self.attach_run("sed -i ':a;N;$!ba;s/\"local\/lib\",\\n/"
683                        "\"lib_placeholder\",\\n/g' %s" % site_module)
684        self.attach_run("sed -i ':a;N;$!ba;s/\"lib\",\\n/"
685                        "\"local\/lib\",\\n/g' %s" % site_module)
686        self.attach_run('sed -i "s/lib_placeholder/lib/g" %s' %
687                        site_module)
688
689
690
691class ContainerBucket(object):
692    """A wrapper class to interact with containers in a specific container path.
693    """
694
695    def __init__(self, container_path=DEFAULT_CONTAINER_PATH):
696        """Initialize a ContainerBucket.
697
698        @param container_path: Path to the directory used to store containers.
699                               Default is set to AUTOSERV/container_path in
700                               global config.
701        """
702        self.container_path = os.path.realpath(container_path)
703
704
705    def get_all(self):
706        """Get details of all containers.
707
708        @return: A dictionary of all containers with detailed attributes,
709                 indexed by container name.
710        """
711        info_collection = get_container_info(self.container_path)
712        containers = {}
713        for info in info_collection:
714            container = Container(self.container_path, info)
715            containers[container.name] = container
716        return containers
717
718
719    def get(self, name):
720        """Get a container with matching name.
721
722        @param name: Name of the container.
723
724        @return: A container object with matching name. Returns None if no
725                 container matches the given name.
726        """
727        return self.get_all().get(name, None)
728
729
730    def exist(self, name):
731        """Check if a container exists with the given name.
732
733        @param name: Name of the container.
734
735        @return: True if the container with the given name exists, otherwise
736                 returns False.
737        """
738        return self.get(name) != None
739
740
741    def destroy_all(self):
742        """Destroy all containers, base must be destroyed at the last.
743        """
744        containers = self.get_all().values()
745        for container in sorted(containers,
746                                key=lambda n: 1 if n.name == BASE else 0):
747            logging.info('Destroy container %s.', container.name)
748            container.destroy()
749
750
751    @metrics.SecondsTimerDecorator('%s/create_from_base_duration' % STATS_KEY)
752    def create_from_base(self, name, disable_snapshot_clone=False,
753                         force_cleanup=False):
754        """Create a container from the base container.
755
756        @param name: Name of the container.
757        @param disable_snapshot_clone: Set to True to force to clone without
758                using snapshot clone even if the host supports that.
759        @param force_cleanup: Force to cleanup existing container.
760
761        @return: A Container object for the created container.
762
763        @raise ContainerError: If the container already exist.
764        @raise error.CmdError: If lxc-clone call failed for any reason.
765        """
766        if self.exist(name) and not force_cleanup:
767            raise error.ContainerError('Container %s already exists.' % name)
768
769        # Cleanup existing container with the given name.
770        container_folder = os.path.join(self.container_path, name)
771        if lxc_utils.path_exists(container_folder) and force_cleanup:
772            container = Container(self.container_path, {'name': name})
773            try:
774                container.destroy()
775            except error.CmdError as e:
776                # The container could be created in a incompleted state. Delete
777                # the container folder instead.
778                logging.warn('Failed to destroy container %s, error: %s',
779                             name, e)
780                utils.run('sudo rm -rf "%s"' % container_folder)
781
782        use_snapshot = SUPPORT_SNAPSHOT_CLONE and not disable_snapshot_clone
783        snapshot = '-s' if  use_snapshot else ''
784        # overlayfs is the default clone backend storage. However it is not
785        # supported in Ganeti yet. Use aufs as the alternative.
786        aufs = '-B aufs' if utils.is_vm() and use_snapshot else ''
787        cmd = ('sudo lxc-clone -p %s -P %s %s' %
788               (self.container_path, self.container_path,
789                ' '.join([BASE, name, snapshot, aufs])))
790        try:
791            utils.run(cmd)
792            return self.get(name)
793        except error.CmdError:
794            if not use_snapshot:
795                raise
796            else:
797                # Snapshot clone failed, retry clone without snapshot. The retry
798                # won't hit the code here and cause an infinite loop as
799                # disable_snapshot_clone is set to True.
800                container = self.create_from_base(
801                        name, disable_snapshot_clone=True, force_cleanup=True)
802                # Report metadata about retry success.
803                autotest_es.post(use_http=True,
804                                 type_str=CONTAINER_CREATE_RETRY_METADB_TYPE,
805                                 metadata={'drone': socket.gethostname(),
806                                           'name': name,
807                                           'success': True})
808                return container
809
810
811    @cleanup_if_fail()
812    def setup_base(self, name=BASE, force_delete=False):
813        """Setup base container.
814
815        @param name: Name of the base container, default to base.
816        @param force_delete: True to force to delete existing base container.
817                             This action will destroy all running test
818                             containers. Default is set to False.
819        """
820        if not self.container_path:
821            raise error.ContainerError(
822                    'You must set a valid directory to store containers in '
823                    'global config "AUTOSERV/ container_path".')
824
825        if not os.path.exists(self.container_path):
826            os.makedirs(self.container_path)
827
828        base_path = os.path.join(self.container_path, name)
829        if self.exist(name) and not force_delete:
830            logging.error(
831                    'Base container already exists. Set force_delete to True '
832                    'to force to re-stage base container. Note that this '
833                    'action will destroy all running test containers')
834            # Set proper file permission. base container in moblab may have
835            # owner of not being root. Force to update the folder's owner.
836            # TODO(dshi): Change root to current user when test container can be
837            # unprivileged container.
838            utils.run('sudo chown -R root "%s"' % base_path)
839            utils.run('sudo chgrp -R root "%s"' % base_path)
840            return
841
842        # Destroy existing base container if exists.
843        if self.exist(name):
844            # TODO: We may need to destroy all snapshots created from this base
845            # container, not all container.
846            self.destroy_all()
847
848        # Download and untar the base container.
849        tar_path = os.path.join(self.container_path, '%s.tar.xz' % name)
850        path_to_cleanup = [tar_path, base_path]
851        for path in path_to_cleanup:
852            if os.path.exists(path):
853                utils.run('sudo rm -rf "%s"' % path)
854        container_url = CONTAINER_BASE_URL_FMT % name
855        download_extract(container_url, tar_path, self.container_path)
856        # Remove the downloaded container tar file.
857        utils.run('sudo rm "%s"' % tar_path)
858        # Set proper file permission.
859        # TODO(dshi): Change root to current user when test container can be
860        # unprivileged container.
861        utils.run('sudo chown -R root "%s"' % base_path)
862        utils.run('sudo chgrp -R root "%s"' % base_path)
863
864        # Update container config with container_path from global config.
865        config_path = os.path.join(base_path, 'config')
866        utils.run('sudo sed -i "s|container_dir|%s|g" "%s"' %
867                  (self.container_path, config_path))
868
869
870    @metrics.SecondsTimerDecorator('%s/setup_test_duration' % STATS_KEY)
871    @cleanup_if_fail()
872    def setup_test(self, name, job_id, server_package_url, result_path,
873                   control=None, skip_cleanup=False, job_folder=None,
874                   dut_name=None):
875        """Setup test container for the test job to run.
876
877        The setup includes:
878        1. Install autotest_server package from given url.
879        2. Copy over local shadow_config.ini.
880        3. Mount local site-packages.
881        4. Mount test result directory.
882
883        TODO(dshi): Setup also needs to include test control file for autoserv
884                    to run in container.
885
886        @param name: Name of the container.
887        @param job_id: Job id for the test job to run in the test container.
888        @param server_package_url: Url to download autotest_server package.
889        @param result_path: Directory to be mounted to container to store test
890                            results.
891        @param control: Path to the control file to run the test job. Default is
892                        set to None.
893        @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
894                             container failures.
895        @param job_folder: Folder name of the job, e.g., 123-debug_user.
896        @param dut_name: Name of the dut to run test, used as the hostname of
897                         the container. Default is None.
898        @return: A Container object for the test container.
899
900        @raise ContainerError: If container does not exist, or not running.
901        """
902        start_time = time.time()
903
904        if not os.path.exists(result_path):
905            raise error.ContainerError('Result directory does not exist: %s',
906                                       result_path)
907        result_path = os.path.abspath(result_path)
908
909        # Save control file to result_path temporarily. The reason is that the
910        # control file in drone_tmp folder can be deleted during scheduler
911        # restart. For test not using SSP, the window between test starts and
912        # control file being picked up by the test is very small (< 2 seconds).
913        # However, for tests using SSP, it takes around 1 minute before the
914        # container is setup. If scheduler is restarted during that period, the
915        # control file will be deleted, and the test will fail.
916        if control:
917            control_file_name = os.path.basename(control)
918            safe_control = os.path.join(result_path, control_file_name)
919            utils.run('cp %s %s' % (control, safe_control))
920
921        # Create test container from the base container.
922        container = self.create_from_base(name)
923
924        # Update the hostname of the test container to be `dut_name`.
925        # Some TradeFed tests use hostname in test results, which is used to
926        # group test results in dashboard. The default container name is set to
927        # be the name of the folder, which is unique (as it is composed of job
928        # id and timestamp. For better result view, the container's hostname is
929        # set to be a string containing the dut hostname.
930        if dut_name:
931            config_file = os.path.join(container.container_path, name, 'config')
932            lxc_utsname_setting = (
933                    'lxc.utsname = ' +
934                    CONTAINER_UTSNAME_FORMAT % dut_name.replace('.', '_'))
935            utils.run(APPEND_CMD_FMT % {'content': lxc_utsname_setting,
936                                        'file': config_file})
937
938        # Deploy server side package
939        usr_local_path = os.path.join(container.rootfs, 'usr', 'local')
940        autotest_pkg_path = os.path.join(usr_local_path,
941                                         'autotest_server_package.tar.bz2')
942        autotest_path = os.path.join(usr_local_path, 'autotest')
943        # sudo is required so os.makedirs may not work.
944        utils.run('sudo mkdir -p %s'% usr_local_path)
945
946        download_extract(server_package_url, autotest_pkg_path, usr_local_path)
947        deploy_config_manager = lxc_config.DeployConfigManager(container)
948        deploy_config_manager.deploy_pre_start()
949
950        # Copy over control file to run the test job.
951        if control:
952            container_drone_temp = os.path.join(autotest_path, 'drone_tmp')
953            utils.run('sudo mkdir -p %s'% container_drone_temp)
954            container_control_file = os.path.join(
955                    container_drone_temp, control_file_name)
956            # Move the control file stored in the result folder to container.
957            utils.run('sudo mv %s %s' % (safe_control, container_control_file))
958
959        if IS_MOBLAB:
960            site_packages_path = MOBLAB_SITE_PACKAGES
961            site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:]
962        else:
963            site_packages_path = os.path.join(common.autotest_dir,
964                                              'site-packages')
965            site_packages_container_path = os.path.join(
966                    lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages')
967        mount_entries = [(site_packages_path, site_packages_container_path,
968                          True),
969                         (os.path.join(common.autotest_dir, 'puppylab'),
970                          os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
971                                       'puppylab'),
972                          True),
973                         (result_path,
974                          os.path.join(RESULT_DIR_FMT % job_folder),
975                          False),
976                        ]
977        for mount_config in deploy_config_manager.mount_configs:
978            mount_entries.append((mount_config.source, mount_config.target,
979                                  mount_config.readonly))
980        # Update container config to mount directories.
981        for source, destination, readonly in mount_entries:
982            container.mount_dir(source, destination, readonly)
983
984        # Update file permissions.
985        # TODO(dshi): crbug.com/459344 Skip following action when test container
986        # can be unprivileged container.
987        utils.run('sudo chown -R root "%s"' % autotest_path)
988        utils.run('sudo chgrp -R root "%s"' % autotest_path)
989
990        container.start(name)
991        deploy_config_manager.deploy_post_start()
992
993        container.modify_import_order()
994
995        container.verify_autotest_setup(job_folder)
996
997        autotest_es.post(use_http=True,
998                         type_str=CONTAINER_CREATE_METADB_TYPE,
999                         metadata={'drone': socket.gethostname(),
1000                                   'job_id': job_id,
1001                                   'time_used': time.time() - start_time,
1002                                   'success': True})
1003
1004        logging.debug('Test container %s is set up.', name)
1005        return container
1006
1007
1008def parse_options():
1009    """Parse command line inputs.
1010
1011    @raise argparse.ArgumentError: If command line arguments are invalid.
1012    """
1013    parser = argparse.ArgumentParser()
1014    parser.add_argument('-s', '--setup', action='store_true',
1015                        default=False,
1016                        help='Set up base container.')
1017    parser.add_argument('-p', '--path', type=str,
1018                        help='Directory to store the container.',
1019                        default=DEFAULT_CONTAINER_PATH)
1020    parser.add_argument('-f', '--force_delete', action='store_true',
1021                        default=False,
1022                        help=('Force to delete existing containers and rebuild '
1023                              'base containers.'))
1024    parser.add_argument('-n', '--name', type=str,
1025                        help='Name of the base container.',
1026                        default=BASE)
1027    options = parser.parse_args()
1028    if not options.setup and not options.force_delete:
1029        raise argparse.ArgumentError(
1030                'Use --setup to setup a base container, or --force_delete to '
1031                'delete all containers in given path.')
1032    return options
1033
1034
1035def main():
1036    """main script."""
1037    # Force to run the setup as superuser.
1038    # TODO(dshi): crbug.com/459344 Set remove this enforcement when test
1039    # container can be unprivileged container.
1040    if utils.sudo_require_password():
1041        logging.warn('SSP requires root privilege to run commands, please '
1042                     'grant root access to this process.')
1043        utils.run('sudo true')
1044
1045    options = parse_options()
1046    bucket = ContainerBucket(container_path=options.path)
1047    if options.setup:
1048        bucket.setup_base(name=options.name, force_delete=options.force_delete)
1049    elif options.force_delete:
1050        bucket.destroy_all()
1051
1052
1053if __name__ == '__main__':
1054    main()
1055