1"""This class defines the Remote host class.""" 2 3import os, logging, urllib, time 4from autotest_lib.client.common_lib import error 5from autotest_lib.server import utils 6from autotest_lib.server.hosts import base_classes 7 8 9class RemoteHost(base_classes.Host): 10 """ 11 This class represents a remote machine on which you can run 12 programs. 13 14 It may be accessed through a network, a serial line, ... 15 It is not the machine autoserv is running on. 16 17 Implementation details: 18 This is an abstract class, leaf subclasses must implement the methods 19 listed here and in parent classes which have no implementation. They 20 may reimplement methods which already have an implementation. You 21 must not instantiate this class but should instantiate one of those 22 leaf subclasses. 23 """ 24 25 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 26 DEFAULT_HALT_TIMEOUT = 2 * 60 27 _LABEL_FUNCTIONS = [] 28 _DETECTABLE_LABELS = [] 29 30 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 31 32 33 def _initialize(self, hostname, autodir=None, *args, **dargs): 34 super(RemoteHost, self)._initialize(*args, **dargs) 35 36 self.hostname = hostname 37 self.autodir = autodir 38 self.tmp_dirs = [] 39 40 41 def __repr__(self): 42 return "<remote host: %s>" % self.hostname 43 44 45 def close(self): 46 super(RemoteHost, self).close() 47 self.stop_loggers() 48 49 if hasattr(self, 'tmp_dirs'): 50 for dir in self.tmp_dirs: 51 try: 52 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 53 except error.AutoservRunError: 54 pass 55 56 57 def job_start(self): 58 """ 59 Abstract method, called the first time a remote host object 60 is created for a specific host after a job starts. 61 62 This method depends on the create_host factory being used to 63 construct your host object. If you directly construct host objects 64 you will need to call this method yourself (and enforce the 65 single-call rule). 66 """ 67 try: 68 cmd = ('test ! -e /var/log/messages || cp -f /var/log/messages ' 69 '%s') % self.VAR_LOG_MESSAGES_COPY_PATH 70 self.run(cmd) 71 except Exception, e: 72 # Non-fatal error 73 logging.info('Failed to copy /var/log/messages at startup: %s', e) 74 75 76 def get_autodir(self): 77 return self.autodir 78 79 80 def set_autodir(self, autodir): 81 """ 82 This method is called to make the host object aware of the 83 where autotest is installed. Called in server/autotest.py 84 after a successful install 85 """ 86 self.autodir = autodir 87 88 89 def sysrq_reboot(self): 90 self.run_background('echo b > /proc/sysrq-trigger') 91 92 93 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 94 """ 95 Shut down the remote host. 96 97 N.B. This method makes no provision to bring the target back 98 up. The target will be offline indefinitely if there's no 99 independent hardware (servo, RPM, etc.) to force the target to 100 power on. 101 102 @param timeout Maximum time to wait for host down, in seconds. 103 @param wait Whether to wait for the host to go offline. 104 """ 105 self.run_background('sleep 1 ; halt') 106 if wait: 107 self.wait_down(timeout=timeout) 108 109 110 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, wait=True, 111 fastsync=False, reboot_cmd=None, **dargs): 112 """ 113 Reboot the remote host. 114 115 Args: 116 timeout - How long to wait for the reboot. 117 wait - Should we wait to see if the machine comes back up. 118 fastsync - Don't wait for the sync to complete, just start one 119 and move on. This is for cases where rebooting prompty 120 is more important than data integrity and/or the 121 machine may have disks that cause sync to never return. 122 reboot_cmd - Reboot command to execute. 123 """ 124 self.reboot_setup(**dargs) 125 if not reboot_cmd: 126 reboot_cmd = ('sync & sleep 5; ' 127 'reboot & sleep 60; ' 128 'reboot -f & sleep 10; ' 129 'reboot -nf & sleep 10; ' 130 'telinit 6') 131 132 def reboot(): 133 # pylint: disable=missing-docstring 134 self.record("GOOD", None, "reboot.start") 135 try: 136 current_boot_id = self.get_boot_id() 137 138 # sync before starting the reboot, so that a long sync during 139 # shutdown isn't timed out by wait_down's short timeout 140 if not fastsync: 141 self.run('sync; sync', timeout=timeout, ignore_status=True) 142 143 self.run_background(reboot_cmd) 144 except error.AutoservRunError: 145 self.record("ABORT", None, "reboot.start", 146 "reboot command failed") 147 raise 148 if wait: 149 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 150 **dargs) 151 152 # if this is a full reboot-and-wait, run the reboot inside a group 153 if wait: 154 self.log_op(self.OP_REBOOT, reboot) 155 else: 156 reboot() 157 158 def suspend(self, timeout, suspend_cmd, **dargs): 159 """ 160 Suspend the remote host. 161 162 Args: 163 timeout - How long to wait for the suspend. 164 susped_cmd - suspend command to execute. 165 """ 166 # define a function for the supend and run it in a group 167 def suspend(): 168 # pylint: disable=missing-docstring 169 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 170 try: 171 self.run_background(suspend_cmd) 172 except error.AutoservRunError: 173 self.record("ABORT", None, "suspend.start", 174 "suspend command failed") 175 raise error.AutoservSuspendError("suspend command failed") 176 177 # Wait for some time, to ensure the machine is going to sleep. 178 # Not too long to check if the machine really suspended. 179 time_slice = min(timeout / 2, 300) 180 time.sleep(time_slice) 181 time_counter = time_slice 182 while time_counter < timeout + 60: 183 # Check if the machine is back. We check regularely to 184 # ensure the machine was suspended long enough. 185 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 186 return 187 else: 188 if time_counter > timeout - 10: 189 time_slice = 5 190 time.sleep(time_slice) 191 time_counter += time_slice 192 193 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 194 raise error.AutoservSuspendError( 195 "DUT is not responding after %d seconds" % (time_counter)) 196 197 start_time = time.time() 198 self.log_op(self.OP_SUSPEND, suspend) 199 lasted = time.time() - start_time 200 if (lasted < timeout): 201 raise error.AutoservSuspendError( 202 "Suspend did not last long enough: %d instead of %d" % ( 203 lasted, timeout)) 204 205 def reboot_followup(self, *args, **dargs): 206 super(RemoteHost, self).reboot_followup(*args, **dargs) 207 if self.job: 208 self.job.profilers.handle_reboot(self) 209 210 211 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 212 """ 213 Wait for the host to come back from a reboot. This wraps the 214 generic wait_for_restart implementation in a reboot group. 215 """ 216 def op_func(): 217 # pylint: disable=missing-docstring 218 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 219 self.log_op(self.OP_REBOOT, op_func) 220 221 222 def cleanup(self): 223 super(RemoteHost, self).cleanup() 224 self.reboot() 225 226 227 def get_tmp_dir(self, parent='/tmp'): 228 """ 229 Return the pathname of a directory on the host suitable 230 for temporary file storage. 231 232 The directory and its content will be deleted automatically 233 on the destruction of the Host object that was used to obtain 234 it. 235 """ 236 self.run("mkdir -p %s" % parent) 237 template = os.path.join(parent, 'autoserv-XXXXXX') 238 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip() 239 self.tmp_dirs.append(dir_name) 240 return dir_name 241 242 243 def get_platform_label(self): 244 """ 245 Return the platform label, or None if platform label is not set. 246 """ 247 248 if self.job: 249 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 250 self.hostname) 251 keyvals = utils.read_keyval(keyval_path) 252 return keyvals.get('platform', None) 253 else: 254 return None 255 256 257 def get_all_labels(self): 258 """ 259 Return all labels, or empty list if label is not set. 260 """ 261 if self.job: 262 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 263 self.hostname) 264 keyvals = utils.read_keyval(keyval_path) 265 all_labels = keyvals.get('labels', '') 266 if all_labels: 267 all_labels = all_labels.split(',') 268 return [urllib.unquote(label) for label in all_labels] 269 return [] 270 271 272 def delete_tmp_dir(self, tmpdir): 273 """ 274 Delete the given temporary directory on the remote machine. 275 276 @param tmpdir The directory to delete. 277 """ 278 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 279 self.tmp_dirs.remove(tmpdir) 280 281 282 def check_uptime(self): 283 """ 284 Check that uptime is available and monotonically increasing. 285 """ 286 if not self.is_up(): 287 raise error.AutoservHostError('Client does not appear to be up') 288 result = self.run("/bin/cat /proc/uptime", 30) 289 return result.stdout.strip().split()[0] 290 291 292 def check_for_lkdtm(self): 293 """ 294 Check for kernel dump test module. return True if exist. 295 """ 296 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 297 return self.run(cmd, ignore_status=True).exit_status == 0 298 299 300 def are_wait_up_processes_up(self): 301 """ 302 Checks if any HOSTS waitup processes are running yet on the 303 remote host. 304 305 Returns True if any the waitup processes are running, False 306 otherwise. 307 """ 308 processes = self.get_wait_up_processes() 309 if len(processes) == 0: 310 return True # wait up processes aren't being used 311 for procname in processes: 312 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 313 ignore_status=True).exit_status 314 if exit_status == 0: 315 return True 316 return False 317 318 319 def get_labels(self): 320 """Return a list of labels for this given host. 321 322 This is the main way to retrieve all the automatic labels for a host 323 as it will run through all the currently implemented label functions. 324 """ 325 labels = [] 326 for label_function in self._LABEL_FUNCTIONS: 327 try: 328 label = label_function(self) 329 except Exception as e: 330 logging.error('Label function %s failed; ignoring it.', 331 label_function.__name__) 332 logging.exception(e) 333 label = None 334 if label: 335 if type(label) is str: 336 labels.append(label) 337 elif type(label) is list: 338 labels.extend(label) 339 return labels 340