1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import datetime
7import logging
8import pprint
9import time
10
11import common
12from autotest_lib.client.common_lib import error
13from autotest_lib.client.common_lib import utils as client_utils
14from autotest_lib.client.common_lib.cros.network import ap_constants
15from autotest_lib.client.common_lib.cros.network import iw_runner
16from autotest_lib.server import hosts
17from autotest_lib.server import site_linux_system
18from autotest_lib.server.cros import host_lock_manager
19from autotest_lib.server.cros.ap_configurators import ap_batch_locker
20from autotest_lib.server.cros.ap_configurators \
21        import ap_configurator_factory
22from autotest_lib.server.cros.network import chaos_clique_utils as utils
23from autotest_lib.server.cros.network import wifi_client
24
25# Webdriver master hostname
26MASTERNAME = 'chromeos3-chaosvmmaster.cros.corp.google.com'
27WEBDRIVER_PORT = 9515
28
29
30class ChaosRunner(object):
31    """Object to run a network_WiFi_ChaosXXX test."""
32
33
34    def __init__(self, test, host, spec, broken_pdus=list()):
35        """Initializes and runs test.
36
37        @param test: a string, test name.
38        @param host: an Autotest host object, device under test.
39        @param spec: an APSpec object.
40        @param broken_pdus: list of offline PDUs.
41
42        """
43        self._test = test
44        self._host = host
45        self._ap_spec = spec
46        self._broken_pdus = broken_pdus
47        # Log server and DUT times
48        dt = datetime.datetime.now()
49        logging.info('Server time: %s', dt.strftime('%a %b %d %H:%M:%S %Y'))
50        logging.info('DUT time: %s', self._host.run('date').stdout.strip())
51
52
53    def run(self, job, batch_size=10, tries=10, capturer_hostname=None,
54            conn_worker=None, work_client_hostname=None,
55            disabled_sysinfo=False):
56        """Executes Chaos test.
57
58        @param job: an Autotest job object.
59        @param batch_size: an integer, max number of APs to lock in one batch.
60        @param tries: an integer, number of iterations to run per AP.
61        @param capturer_hostname: a string or None, hostname or IP of capturer.
62        @param conn_worker: ConnectionWorkerAbstract or None, to run extra
63                            work after successful connection.
64        @param work_client_hostname: a string or None, hostname of work client
65        @param disabled_sysinfo: a bool, disable collection of logs from DUT.
66
67
68        @raises TestError: Issues locking VM webdriver instance
69        """
70
71        lock_manager = host_lock_manager.HostLockManager()
72        webdriver_master = hosts.SSHHost(MASTERNAME, user='chaosvmmaster')
73        host_prefix = self._host.hostname.split('-')[0]
74        with host_lock_manager.HostsLockedBy(lock_manager):
75            capture_host = utils.allocate_packet_capturer(
76                    lock_manager, hostname=capturer_hostname,
77                    prefix=host_prefix)
78            # Cleanup and reboot packet capturer before the test.
79            utils.sanitize_client(capture_host)
80            capturer = site_linux_system.LinuxSystem(capture_host, {},
81                                                     'packet_capturer')
82
83            # Run iw scan and abort if more than allowed number of APs are up.
84            iw_command = iw_runner.IwRunner(capture_host)
85            start_time = time.time()
86            logging.info('Performing a scan with a max timeout of 30 seconds.')
87            capture_interface = 'wlan0'
88            capturer_info = capture_host.run('cat /etc/lsb-release',
89                                             ignore_status=True, timeout=5).stdout
90            if 'whirlwind' in capturer_info:
91                # Use the dual band aux radio for scanning networks.
92                capture_interface = 'wlan2'
93            while time.time() - start_time <= ap_constants.MAX_SCAN_TIMEOUT:
94                networks = iw_command.scan(capture_interface)
95                if networks is None:
96                    if (time.time() - start_time ==
97                            ap_constants.MAX_SCAN_TIMEOUT):
98                        raise error.TestError(
99                            'Packet capturer is not responding to scans. Check'
100                            'device and re-run test')
101                    continue
102                elif len(networks) < ap_constants.MAX_SSID_COUNT:
103                    break
104                elif len(networks) >= ap_constants.MAX_SSID_COUNT:
105                    raise error.TestError(
106                        'Probably someone is already running a '
107                        'chaos test?!')
108
109            if conn_worker is not None:
110                work_client_machine = utils.allocate_packet_capturer(
111                        lock_manager, hostname=work_client_hostname)
112                conn_worker.prepare_work_client(work_client_machine)
113
114            # Lock VM. If on, power off; always power on. Then create a tunnel.
115            webdriver_instance = utils.allocate_webdriver_instance(lock_manager)
116
117            if utils.is_VM_running(webdriver_master, webdriver_instance):
118                logging.info('VM %s was on; powering off for a clean instance',
119                             webdriver_instance)
120                utils.power_off_VM(webdriver_master, webdriver_instance)
121                logging.info('Allow VM time to gracefully shut down')
122                time.sleep(5)
123
124            logging.info('Starting up VM %s', webdriver_instance)
125            utils.power_on_VM(webdriver_master, webdriver_instance)
126            logging.info('Allow VM time to power on before creating a tunnel.')
127            time.sleep(30)
128
129            if not client_utils.host_is_in_lab_zone(webdriver_instance.hostname):
130                self._ap_spec._webdriver_hostname = webdriver_instance.hostname
131            else:
132                # If in the lab then port forwarding must be done so webdriver
133                # connection will be over localhost.
134                self._ap_spec._webdriver_hostname = 'localhost'
135                webdriver_tunnel = webdriver_instance.create_ssh_tunnel(
136                                                WEBDRIVER_PORT, WEBDRIVER_PORT)
137                logging.info('Wait for tunnel to be created.')
138                for i in range(3):
139                    time.sleep(10)
140                    results = client_utils.run('lsof -i:%s' % WEBDRIVER_PORT,
141                                             ignore_status=True)
142                    if results:
143                        break
144                if not results:
145                    raise error.TestError(
146                            'Unable to listen to WEBDRIVER_PORT: %s', results)
147
148            batch_locker = ap_batch_locker.ApBatchLocker(
149                    lock_manager, self._ap_spec,
150                    ap_test_type=ap_constants.AP_TEST_TYPE_CHAOS)
151
152            while batch_locker.has_more_aps():
153                # Work around for CrOS devices only:crbug.com/358716
154                utils.sanitize_client(self._host)
155                healthy_dut = True
156
157                with contextlib.closing(wifi_client.WiFiClient(
158                    hosts.create_host(
159                            {
160                                    'hostname' : self._host.hostname,
161                                    'afe_host' : self._host._afe_host,
162                                    'host_info_store':
163                                            self._host.host_info_store,
164                            },
165                            host_class=self._host.__class__,
166                    ),
167                    './debug',
168                    False,
169                )) as client:
170
171                    aps = batch_locker.get_ap_batch(batch_size=batch_size)
172                    if not aps:
173                        logging.info('No more APs to test.')
174                        break
175
176                    # Power down all of the APs because some can get grumpy
177                    # if they are configured several times and remain on.
178                    # User the cartridge to down group power downs and
179                    # configurations.
180                    utils.power_down_aps(aps, self._broken_pdus)
181                    utils.configure_aps(aps, self._ap_spec, self._broken_pdus)
182
183                    aps = utils.filter_quarantined_and_config_failed_aps(aps,
184                            batch_locker, job, self._broken_pdus)
185
186                    for ap in aps:
187                        # http://crbug.com/306687
188                        if ap.ssid == None:
189                            logging.error('The SSID was not set for the AP:%s',
190                                          ap)
191
192                        healthy_dut = utils.is_dut_healthy(client, ap)
193
194                        if not healthy_dut:
195                            logging.error('DUT is not healthy, rebooting.')
196                            batch_locker.unlock_and_reclaim_aps()
197                            break
198
199                        networks = utils.return_available_networks(
200                                ap, capturer, job, self._ap_spec)
201
202                        if networks is None:
203                            # If scan returned no networks, iw scan failed.
204                            # Reboot the packet capturer device and
205                            # reconfigure the capturer.
206                            batch_locker.unlock_and_reclaim_ap(ap.host_name)
207                            logging.error('Packet capture is not healthy, '
208                                          'rebooting.')
209                            capturer.host.reboot()
210                            capturer = site_linux_system.LinuxSystem(
211                                           capture_host, {},'packet_capturer')
212                            continue
213                        if networks == list():
214                           # Packet capturer did not find the SSID in scan or
215                           # there was a security mismatch.
216                           utils.release_ap(ap, batch_locker, self._broken_pdus)
217                           continue
218
219                        assoc_params = ap.get_association_parameters()
220
221                        if not utils.is_conn_worker_healthy(
222                                conn_worker, ap, assoc_params, job):
223                            utils.release_ap(
224                                    ap, batch_locker, self._broken_pdus)
225                            continue
226
227                        name = ap.name
228                        kernel_ver = self._host.get_kernel_ver()
229                        firmware_ver = utils.get_firmware_ver(self._host)
230                        if not firmware_ver:
231                            firmware_ver = "Unknown"
232
233                        debug_dict = {'+++PARSE DATA+++': '+++PARSE DATA+++',
234                                      'SSID': ap._ssid,
235                                      'DUT': client.wifi_mac,
236                                      'AP Info': ap.name,
237                                      'kernel_version': kernel_ver,
238                                      'wifi_firmware_version': firmware_ver}
239                        debug_string = pprint.pformat(debug_dict)
240
241                        logging.info('Waiting %d seconds for the AP dhcp '
242                                     'server', ap.dhcp_delay)
243                        time.sleep(ap.dhcp_delay)
244
245                        result = job.run_test(self._test,
246                                     capturer=capturer,
247                                     capturer_frequency=networks[0].frequency,
248                                     capturer_ht_type=networks[0].ht,
249                                     host=self._host,
250                                     assoc_params=assoc_params,
251                                     client=client,
252                                     tries=tries,
253                                     debug_info=debug_string,
254                                     # Copy all logs from the system
255                                     disabled_sysinfo=disabled_sysinfo,
256                                     conn_worker=conn_worker,
257                                     tag=ap.ssid if conn_worker is None else
258                                         '%s.%s' % (conn_worker.name, ap.ssid))
259
260                        utils.release_ap(ap, batch_locker, self._broken_pdus)
261
262                        if conn_worker is not None:
263                            conn_worker.cleanup()
264
265                    if not healthy_dut:
266                        continue
267
268                batch_locker.unlock_aps()
269
270            if webdriver_tunnel:
271                webdriver_instance.disconnect_ssh_tunnel(webdriver_tunnel,
272                                                         WEBDRIVER_PORT)
273                webdriver_instance.close()
274            capturer.close()
275            logging.info('Powering off VM %s', webdriver_instance)
276            utils.power_off_VM(webdriver_master, webdriver_instance)
277            lock_manager.unlock(webdriver_instance.hostname)
278
279            if self._broken_pdus:
280                logging.info('PDU is down!!!\nThe following PDUs are down:\n')
281                pprint.pprint(self._broken_pdus)
282
283            factory = ap_configurator_factory.APConfiguratorFactory(
284                    ap_constants.AP_TEST_TYPE_CHAOS)
285            factory.turn_off_all_routers(self._broken_pdus)
286