1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import datetime
7import logging
8import pprint
9import time
10
11import common
12from autotest_lib.client.common_lib import error, site_utils
13from autotest_lib.client.common_lib import utils as base_utils
14from autotest_lib.client.common_lib.cros.network import ap_constants
15from autotest_lib.client.common_lib.cros.network import iw_runner
16from autotest_lib.server import hosts
17from autotest_lib.server import site_linux_system
18from autotest_lib.server.cros import host_lock_manager
19from autotest_lib.server.cros.ap_configurators import ap_batch_locker
20from autotest_lib.server.cros.ap_configurators \
21        import ap_configurator_factory
22from autotest_lib.server.cros.network import chaos_clique_utils as utils
23from autotest_lib.server.cros.network import wifi_client
24from autotest_lib.server.hosts import adb_host
25
26# Webdriver master hostname
27MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com'
28WEBDRIVER_PORT = 9515
29
30
31class ChaosRunner(object):
32    """Object to run a network_WiFi_ChaosXXX test."""
33
34
35    def __init__(self, test, host, spec, broken_pdus=list()):
36        """Initializes and runs test.
37
38        @param test: a string, test name.
39        @param host: an Autotest host object, device under test.
40        @param spec: an APSpec object.
41        @param broken_pdus: list of offline PDUs.
42
43        """
44        self._test = test
45        self._host = host
46        self._ap_spec = spec
47        self._broken_pdus = broken_pdus
48        # Log server and DUT times
49        dt = datetime.datetime.now()
50        logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y'))
51        logging.info('DUT time: %s', self._host.run('date').stdout.strip())
52
53
54    def run(self, job, batch_size=10, tries=10, capturer_hostname=None,
55            conn_worker=None, work_client_hostname=None,
56            disabled_sysinfo=False):
57        """Executes Chaos test.
58
59        @param job: an Autotest job object.
60        @param batch_size: an integer, max number of APs to lock in one batch.
61        @param tries: an integer, number of iterations to run per AP.
62        @param capturer_hostname: a string or None, hostname or IP of capturer.
63        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
64                            work after successful connection.
65        @param work_client_hostname: a string or None, hostname of work client
66        @param disabled_sysinfo: a bool, disable collection of logs from DUT.
67
68
69        @raises TestError: Issues locking VM webdriver instance
70        """
71
72        lock_manager = host_lock_manager.HostLockManager()
73        webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster')
74        host_prefix = self._host.hostname.split('-')[0]
75        with host_lock_manager.HostsLockedBy(lock_manager):
76            capture_host = utils.allocate_packet_capturer(
77                    lock_manager, hostname=capturer_hostname,
78                    prefix=host_prefix)
79            # Cleanup and reboot packet capturer before the test.
80            utils.sanitize_client(capture_host)
81            capturer = site_linux_system.LinuxSystem(capture_host, {},
82                                                     'packet_capturer')
83
84            # Run iw scan and abort if more than allowed number of APs are up.
85            iw_command = iw_runner.IwRunner(capture_host)
86            start_time = time.time()
87            logging.info('Performing a scan with a max timeout of 30 seconds.')
88            capture_interface = 'wlan0'
89            capturer_info = capture_host.run('cat /etc/lsb-release',
90                                             ignore_status=True, timeout=5).stdout
91            if 'whirlwind' in capturer_info:
92                # Use the dual band aux radio for scanning networks.
93                capture_interface = 'wlan2'
94            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
95                networks = iw_command.scan(capture_interface)
96                if networks is None:
97                    if (time.time() - start_time ==
98                            ap_constants.MAX_SCAN_TIMEOUT):
99                        raise error.TestError(
100                            'Packet capturer is not responding to scans. Check'
101                            'device and re-run test')
102                    continue
103                elif len(networks) < ap_constants.MAX_SSID_COUNT:
104                    break
105                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
106                    raise error.TestError(
107                        'Probably someone is already running a '
108                        'chaos test?!')
109
110            if conn_worker is not None:
111                work_client_machine = utils.allocate_packet_capturer(
112                        lock_manager, hostname=work_client_hostname)
113                conn_worker.prepare_work_client(work_client_machine)
114
115            # Lock VM. If on, power off; always power on. Then create a tunnel.
116            webdriver_instance = utils.allocate_webdriver_instance(lock_manager)
117
118            if utils.is_VM_running(webdriver_master, webdriver_instance):
119                logging.info('VM %s was on; powering off for a clean instance',
120                             webdriver_instance)
121                utils.power_off_VM(webdriver_master, webdriver_instance)
122                logging.info('Allow VM time to gracefully shut down')
123                time.sleep(5)
124
125            logging.info('Starting up VM %s', webdriver_instance)
126            utils.power_on_VM(webdriver_master, webdriver_instance)
127            logging.info('Allow VM time to power on before creating a tunnel.')
128            time.sleep(5)
129
130            if not site_utils.host_is_in_lab_zone(webdriver_instance.hostname):
131                self._ap_spec._webdriver_hostname = webdriver_instance.hostname
132            else:
133                # If in the lab then port forwarding must be done so webdriver
134                # connection will be over localhost.
135                self._ap_spec._webdriver_hostname = 'localhost'
136                webdriver_tunnel = webdriver_instance.create_ssh_tunnel(
137                                                WEBDRIVER_PORT, WEBDRIVER_PORT)
138                logging.info('Wait for tunnel to be created.')
139                for i in range(3):
140                    time.sleep(10)
141                    results = base_utils.run('lsof -i:%s' % WEBDRIVER_PORT,
142                                             ignore_status=True)
143                    if results:
144                        break
145                if not results:
146                    raise error.TestError(
147                            'Unable to listen to WEBDRIVER_PORT: %s', results)
148
149            batch_locker = ap_batch_locker.ApBatchLocker(
150                    lock_manager, self._ap_spec,
151                    ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)
152
153            while batch_locker.has_more_aps():
154                # Work around for CrOS devices only:crbug.com/358716
155                # Do not reboot Android devices:b/27977927
156                if self._host.get_os_type() != adb_host.OS_TYPE_ANDROID:
157                    utils.sanitize_client(self._host)
158                healthy_dut = True
159
160                with contextlib.closing(wifi_client.WiFiClient(
161                    hosts.create_host({'hostname' : self._host.hostname,
162                            'afe_host' : self._host._afe_host},
163                            host_class=self._host.__class__),
164                    './debug', False)) as client:
165
166                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
167                    if not aps:
168                        logging.info('No more APs to test.')
169                        break
170
171                    # Power down all of the APs because some can get grumpy
172                    # if they are configured several times and remain on.
173                    # User the cartridge to down group power downs and
174                    # configurations.
175                    utils.power_down_aps(aps, self._broken_pdus)
176                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)
177
178                    aps = utils.filter_quarantined_and_config_failed_aps(aps,
179                            batch_locker, job, self._broken_pdus)
180
181                    for ap in aps:
182                        # http://crbug.com/306687
183                        if ap.ssid == None:
184                            logging.error('The SSID was not set for the AP:%s',
185                                          ap)
186
187                        healthy_dut = utils.is_dut_healthy(client, ap)
188
189                        if not healthy_dut:
190                            logging.error('DUT is not healthy, rebooting.')
191                            batch_locker.unlock_and_reclaim_aps()
192                            break
193
194                        networks = utils.return_available_networks(
195                                ap, capturer, job, self._ap_spec)
196
197                        if networks is None:
198                            # If scan returned no networks, iw scan failed.
199                            # Reboot the packet capturer device and
200                            # reconfigure the capturer.
201                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
202                            logging.error('Packet capture is not healthy, '
203                                          'rebooting.')
204                            capturer.host.reboot()
205                            capturer = site_linux_system.LinuxSystem(
206                                           capture_host, {},'packet_capturer')
207                            continue
208                        if networks == list():
209                           # Packet capturer did not find the SSID in scan or
210                           # there was a security mismatch.
211                           utils.release_ap(ap, batch_locker, self._broken_pdus)
212                           continue
213
214                        assoc_params = ap.get_association_parameters()
215
216                        if not utils.is_conn_worker_healthy(
217                                conn_worker, ap, assoc_params, job):
218                            utils.release_ap(
219                                    ap, batch_locker, self._broken_pdus)
220                            continue
221
222                        name = ap.name
223                        kernel_ver = self._host.get_kernel_ver()
224                        firmware_ver = utils.get_firmware_ver(self._host)
225                        if not firmware_ver:
226                            firmware_ver = "Unknown"
227
228                        debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++',
229                                      'SSID': ap._ssid,
230                                      'DUT': client.wifi_mac,
231                                      'AP Info': ap.name,
232                                      'kernel_version': kernel_ver,
233                                      'wifi_firmware_version': firmware_ver}
234                        debug_string = pprint.pformat(debug_dict)
235
236                        logging.info('Waiting %d seconds for the AP dhcp '
237                                     'server', ap.dhcp_delay)
238                        time.sleep(ap.dhcp_delay)
239
240                        result = job.run_test(self._test,
241                                     capturer=capturer,
242                                     capturer_frequency=networks[0].frequency,
243                                     capturer_ht_type=networks[0].ht,
244                                     host=self._host,
245                                     assoc_params=assoc_params,
246                                     client=client,
247                                     tries=tries,
248                                     debug_info=debug_string,
249                                     # Copy all logs from the system
250                                     disabled_sysinfo=disabled_sysinfo,
251                                     conn_worker=conn_worker,
252                                     tag=ap.ssid if conn_worker is None else
253                                         '%s.%s' % (conn_worker.name, ap.ssid))
254
255                        utils.release_ap(ap, batch_locker, self._broken_pdus)
256
257                        if conn_worker is not None:
258                            conn_worker.cleanup()
259
260                    if not healthy_dut:
261                        continue
262
263                batch_locker.unlock_aps()
264
265            if webdriver_tunnel:
266                webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel,
267                                                         WEBDRIVER_PORT)
268                webdriver_instance.close()
269            capturer.close()
270            logging.info('Powering off VM %s', webdriver_instance)
271            utils.power_off_VM(webdriver_master, webdriver_instance)
272            lock_manager.unlock(webdriver_instance.hostname)
273
274            if self._broken_pdus:
275                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
276                pprint.pprint(self._broken_pdus)
277
278            factory = ap_configurator_factory.APConfiguratorFactory(
279                    ap_constants.AP_TEST_TYPE_CHAOS)
280            factory.turn_off_all_routers(self._broken_pdus)
281