1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging, sys, time 6from autotest_lib.client.common_lib import error 7from autotest_lib.server import autotest 8from autotest_lib.server import hosts 9from autotest_lib.server import test 10 11class hardware_StorageStress(test.test): 12 """ 13 Integrity stress test for storage device 14 """ 15 version = 1 16 17 _HOURS_IN_SEC = 3600 18 # Define default value for the test case 19 _TEST_GAP = 60 # 1 min 20 _TEST_DURATION = 12 * _HOURS_IN_SEC 21 _SUSPEND_DURATION = _HOURS_IN_SEC 22 _FIO_REQUIREMENT_FILE = '8k_async_randwrite' 23 _FIO_WRITE_FLAGS = [] 24 _FIO_VERIFY_FLAGS = ['--verifyonly'] 25 _FIO_TEST = 'hardware_StorageFio' 26 27 def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION, 28 power_command='reboot', storage_test_command='integrity', 29 suspend_duration=_SUSPEND_DURATION, storage_test_argument='', 30 cq=False, nonroot_dev=False): 31 """ 32 Run the Storage stress test 33 Use hardwareStorageFio to run some test_command repeatedly for a long 34 time. Between each iteration of test command, run power command such as 35 reboot or suspend. 36 37 @param client_ip: string of client's ip address (required) 38 @param gap: gap between each test (second) default = 1 min 39 @param duration: duration to run test (second) default = 12 hours 40 @param power_command: command to do between each test Command 41 possible command: reboot / suspend / nothing 42 @param storage_test_command: FIO command to run 43 - integrity: Check data integrity 44 - full_write: Check performance consistency 45 for full disk write. Use argument 46 to determine which disk to write 47 @param suspend_duration: if power_command is suspend, how long the DUT 48 is suspended. 49 @param cq: Indicates that this test is being run as part of 50 the cq. This is not used to test a component for 51 qualification, but to test the storage qual suite 52 """ 53 54 # in a cq run, do not execute the test, just output 55 # the order that the test would have run in 56 if cq: 57 label = 'suspend' if power_command is 'suspend' else 'soak' 58 self.write_test_keyval( 59 {'storage_qual_cq': ('%f hardware_StorageStress_%s' 60 % (time.time(), label))}) 61 return 62 63 if nonroot_dev: 64 self._FIO_TEST = 'hardware_StorageFioOther' 65 66 # init test 67 if not client_ip: 68 raise error.TestError("Must provide client's IP address to test") 69 70 self._client = hosts.create_host(client_ip) 71 self._client_at = autotest.Autotest(self._client) 72 self._results = {} 73 self._suspend_duration = suspend_duration 74 75 # parse power command 76 if power_command == 'nothing': 77 self._power_func = self._do_nothing 78 elif power_command == 'reboot': 79 self._power_func = self._do_reboot 80 elif power_command == 'suspend': 81 self._power_func = self._do_suspend 82 elif power_command == 'wait': 83 self._power_func = self._do_wait 84 else: 85 raise error.TestFail( 86 'Test failed with error: Invalid power command') 87 88 # Test is doing a lot of disk activity, monitor disk data at each iteration. 89 self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True) 90 91 # parse test command 92 if storage_test_command == 'integrity': 93 setup_func = self._write_data 94 loop_func = self._verify_data 95 elif storage_test_command == 'full_write': 96 setup_func = self._do_nothing 97 loop_func = self._full_disk_write 98 # Do at least 2 soak runs. Given the absolute minimum of a loop is 99 # around 1h, duration should be at least 1h. 100 self._soak_time = min(self._TEST_DURATION, duration / 4) 101 else: 102 raise error.TestFail('Test failed with error: Invalid test command') 103 104 # init statistic variable 105 min_time_per_loop = sys.maxsize 106 max_time_per_loop = 0 107 all_loop_time = 0 108 avr_time_per_loop = 0 109 self._loop_count = 0 110 setup_func() 111 112 start_time = time.time() 113 114 while time.time() - start_time < duration: 115 # sleep 116 time.sleep(gap) 117 118 self._loop_count += 1 119 120 # do power command & verify data & calculate time 121 loop_start_time = time.time() 122 loop_func() 123 loop_time = time.time() - loop_start_time 124 125 # update statistic 126 all_loop_time += loop_time 127 min_time_per_loop = min(loop_time, min_time_per_loop) 128 max_time_per_loop = max(loop_time, max_time_per_loop) 129 130 if self._loop_count > 0: 131 avr_time_per_loop = all_loop_time / self._loop_count 132 133 logging.info(str('check data count: %d' % self._loop_count)) 134 135 # report result 136 self.write_perf_keyval({'loop_count':self._loop_count}) 137 self.write_perf_keyval({'min_time_per_loop':min_time_per_loop}) 138 self.write_perf_keyval({'max_time_per_loop':max_time_per_loop}) 139 self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop}) 140 141 def _do_nothing(self): 142 pass 143 144 def _do_wait(self): 145 time.sleep(self._suspend_duration) 146 147 def _do_reboot(self): 148 """ 149 Reboot host machine 150 """ 151 self._client.reboot() 152 153 def _do_suspend(self): 154 """ 155 Suspend host machine 156 """ 157 self._client.suspend(suspend_time=self._suspend_duration) 158 159 def _write_data(self): 160 """ 161 Write test data to host using hardware_StorageFio 162 """ 163 logging.info('_write_data') 164 self._client_at.run_test(self._FIO_TEST, 165 check_client_result=True, disable_sysinfo=True, wait=0, 166 tag='%s_%d' % ('write_data', self._loop_count), 167 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)]) 168 169 def _verify_data(self): 170 """ 171 Verify test data using hardware_StorageFio 172 """ 173 logging.info(str('_verify_data #%d' % self._loop_count)) 174 self._client_at.run_test(self._FIO_TEST, 175 check_client_result=True, disable_sysinfo=True, wait=0, 176 tag='%s_%d' % ('verify_data', self._loop_count), 177 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)]) 178 self._power_func() 179 180 def _full_disk_write(self): 181 """ 182 Do the root device full area write and report performance 183 Write random pattern for few hours, then do a write and a verify, 184 noting the latency. 185 """ 186 logging.info(str('_full_disk_write #%d' % self._loop_count)) 187 188 # use the default requirement that write different pattern arround. 189 self._client_at.run_test(self._FIO_TEST, 190 check_client_result=True, 191 disable_sysinfo=True, 192 tag='%s_%d' % ('soak', self._loop_count), 193 requirements=[('64k_stress', [])], 194 time_length=self._soak_time) 195 196 self._power_func() 197 198 self._client_at.run_test(self._FIO_TEST, 199 check_client_result=True, 200 disable_sysinfo=True, 201 tag='%s_%d' % ('surf', self._loop_count), 202 requirements=[('surfing', [])], 203 time_length=self._soak_time) 204 205 self._power_func() 206 207 self._client_at.run_test(self._FIO_TEST, 208 check_client_result=True, 209 disable_sysinfo=True, 210 tag='%s_%d' % ('integrity', self._loop_count), 211 wait=0, integrity=True) 212 213 self._power_func() 214 215 self._client_at.run_test('hardware_StorageWearoutDetect', 216 tag='%s_%d' % ('wearout', self._loop_count), 217 wait=0, use_cached_result=False) 218 # No checkout for wearout, to test device pass their limits. 219