1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging, sys, time 6from autotest_lib.client.common_lib import error 7from autotest_lib.server import autotest 8from autotest_lib.server import hosts 9from autotest_lib.server import test 10 11class hardware_StorageStress(test.test): 12 """ 13 Integrity stress test for storage device 14 """ 15 version = 1 16 17 _HOURS_IN_SEC = 3600 18 # Define default value for the test case 19 _TEST_GAP = 60 # 1 min 20 _TEST_DURATION = 12 * _HOURS_IN_SEC 21 _SUSPEND_DURATION = _HOURS_IN_SEC 22 _FIO_REQUIREMENT_FILE = '8k_async_randwrite' 23 _FIO_WRITE_FLAGS = [] 24 _FIO_VERIFY_FLAGS = ['--verifyonly'] 25 26 def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION, 27 power_command='reboot', storage_test_command='integrity', 28 suspend_duration=_SUSPEND_DURATION, storage_test_argument='', 29 cq=False): 30 """ 31 Run the Storage stress test 32 Use hardwareStorageFio to run some test_command repeatedly for a long 33 time. Between each iteration of test command, run power command such as 34 reboot or suspend. 35 36 @param client_ip: string of client's ip address (required) 37 @param gap: gap between each test (second) default = 1 min 38 @param duration: duration to run test (second) default = 12 hours 39 @param power_command: command to do between each test Command 40 possible command: reboot / suspend / nothing 41 @param storage_test_command: FIO command to run 42 - integrity: Check data integrity 43 - full_write: Check performance consistency 44 for full disk write. Use argument 45 to determine which disk to write 46 @param suspend_duration: if power_command is suspend, how long the DUT 47 is suspended. 48 @param cq: Indicates that this test is being run as part of 49 the cq. This is not used to test a component for 50 qualification, but to test the storage qual suite 51 """ 52 53 # in a cq run, do not execute the test, just output 54 # the order that the test would have run in 55 if cq: 56 label = 'suspend' if power_command is 'suspend' else 'soak' 57 self.write_test_keyval( 58 {'storage_qual_cq': ('%f hardware_StorageStress_%s' 59 % (time.time(), label))}) 60 return 61 62 # init test 63 if not client_ip: 64 error.TestError("Must provide client's IP address to test") 65 66 self._client = hosts.create_host(client_ip) 67 self._client_at = autotest.Autotest(self._client) 68 self._results = {} 69 self._suspend_duration = suspend_duration 70 71 # parse power command 72 if power_command == 'nothing': 73 self._power_func = self._do_nothing 74 elif power_command == 'reboot': 75 self._power_func = self._do_reboot 76 elif power_command == 'suspend': 77 self._power_func = self._do_suspend 78 elif power_command == 'wait': 79 self._power_func = self._do_wait 80 else: 81 raise error.TestFail( 82 'Test failed with error: Invalid power command') 83 84 # Test is doing a lot of disk activity, monitor disk data at each iteration. 85 self.job.add_sysinfo_logfile('/var/log/storage_info.txt', on_every_test=True) 86 87 # parse test command 88 if storage_test_command == 'integrity': 89 setup_func = self._write_data 90 loop_func = self._verify_data 91 elif storage_test_command == 'full_write': 92 setup_func = self._do_nothing 93 loop_func = self._full_disk_write 94 # Do at least 2 soak runs. Given the absolute minimum of a loop is 95 # around 1h, duration should be at least 1h. 96 self._soak_time = min(self._TEST_DURATION, duration / 4) 97 else: 98 raise error.TestFail('Test failed with error: Invalid test command') 99 100 # init statistic variable 101 min_time_per_loop = sys.maxsize 102 max_time_per_loop = 0 103 all_loop_time = 0 104 avr_time_per_loop = 0 105 self._loop_count = 0 106 setup_func() 107 108 start_time = time.time() 109 110 while time.time() - start_time < duration: 111 # sleep 112 time.sleep(gap) 113 114 self._loop_count += 1 115 116 # do power command & verify data & calculate time 117 loop_start_time = time.time() 118 loop_func() 119 loop_time = time.time() - loop_start_time 120 121 # update statistic 122 all_loop_time += loop_time 123 min_time_per_loop = min(loop_time, min_time_per_loop) 124 max_time_per_loop = max(loop_time, max_time_per_loop) 125 126 if self._loop_count > 0: 127 avr_time_per_loop = all_loop_time / self._loop_count 128 129 logging.info(str('check data count: %d' % self._loop_count)) 130 131 # report result 132 self.write_perf_keyval({'loop_count':self._loop_count}) 133 self.write_perf_keyval({'min_time_per_loop':min_time_per_loop}) 134 self.write_perf_keyval({'max_time_per_loop':max_time_per_loop}) 135 self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop}) 136 137 def _do_nothing(self): 138 pass 139 140 def _do_wait(self): 141 time.sleep(self._suspend_duration) 142 143 def _do_reboot(self): 144 """ 145 Reboot host machine 146 """ 147 self._client.reboot() 148 149 def _do_suspend(self): 150 """ 151 Suspend host machine 152 """ 153 self._client.suspend(suspend_time=self._suspend_duration) 154 155 def _write_data(self): 156 """ 157 Write test data to host using hardware_StorageFio 158 """ 159 logging.info('_write_data') 160 self._client_at.run_test('hardware_StorageFio', 161 check_client_result=True, disable_sysinfo=True, wait=0, 162 tag='%s_%d' % ('write_data', self._loop_count), 163 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)]) 164 165 def _verify_data(self): 166 """ 167 Verify test data using hardware_StorageFio 168 """ 169 logging.info(str('_verify_data #%d' % self._loop_count)) 170 self._client_at.run_test('hardware_StorageFio', 171 check_client_result=True, disable_sysinfo=True, wait=0, 172 tag='%s_%d' % ('verify_data', self._loop_count), 173 requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)]) 174 self._power_func() 175 176 def _full_disk_write(self): 177 """ 178 Do the root device full area write and report performance 179 Write random pattern for few hours, then do a write and a verify, 180 noting the latency. 181 """ 182 logging.info(str('_full_disk_write #%d' % self._loop_count)) 183 184 # use the default requirement that write different pattern arround. 185 self._client_at.run_test('hardware_StorageFio', 186 check_client_result=True, 187 disable_sysinfo=True, 188 tag='%s_%d' % ('soak', self._loop_count), 189 requirements=[('64k_stress', [])], 190 time_length=self._soak_time) 191 192 self._power_func() 193 194 self._client_at.run_test('hardware_StorageFio', 195 check_client_result=True, 196 disable_sysinfo=True, 197 tag='%s_%d' % ('surf', self._loop_count), 198 requirements=[('surfing', [])], 199 time_length=self._soak_time) 200 201 self._power_func() 202 203 self._client_at.run_test('hardware_StorageFio', 204 check_client_result=True, 205 disable_sysinfo=True, 206 tag='%s_%d' % ('integrity', self._loop_count), 207 wait=0, integrity=True) 208 209 self._power_func() 210 211 self._client_at.run_test('hardware_StorageWearoutDetect', 212 tag='%s_%d' % ('wearout', self._loop_count), 213 wait=0, use_cached_result=False) 214 # No checkout for wearout, to test device pass their limits. 215