1#!/usr/bin/env python2
2#
3# Copyright 2015 Google INc.  All Rights Reserved.
4"""This module controls locking and unlocking of test machines."""
5
6from __future__ import print_function
7
8import argparse
9import getpass
10import os
11import sys
12import traceback
13
14from cros_utils import logger
15from cros_utils import machines
16
17
18class AFELockException(Exception):
19  """Base class for exceptions in this module."""
20
21
22class MachineNotPingable(AFELockException):
23  """Raised when machine does not respond to ping."""
24
25
26class MissingHostInfo(AFELockException):
27  """Raised when cannot find info about machine on machine servers."""
28
29
30class UpdateNonLocalMachine(AFELockException):
31  """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
32
33
34class DuplicateAdd(AFELockException):
35  """Raised when user requests to add a machine that's already on the server."""
36
37
38class UpdateServerError(AFELockException):
39  """Raised when attempt to add/remove a machine from local server fails."""
40
41
42class LockingError(AFELockException):
43  """Raised when server fails to lock/unlock machine as requested."""
44
45
46class DontOwnLock(AFELockException):
47  """Raised when user attmepts to unlock machine locked by someone else."""
48  # This should not be raised if the user specified '--force'
49
50
51class NoAFEServer(AFELockException):
52  """Raised when cannot find/access the autotest server."""
53
54
55class AFEAccessError(AFELockException):
56  """Raised when cannot get information about lab machine from lab server."""
57
58
59class AFELockManager(object):
60  """Class for locking/unlocking machines vie Autotest Front End servers.
61
62  This class contains methods for checking the locked status of machines
63  on both the ChromeOS HW Lab AFE server and a local AFE server.  It also
64  has methods for adding/removing machines from the local server, and for
65  changing the lock status of machines on either server.  For the ChromeOS
66  HW Lab, it only allows access to the toolchain team lab machines, as
67  defined in toolchain-utils/crosperf/default_remotes.  By default it will
68  look for a local server on chrotomation2.svl.corp.google.com, but an
69  alternative local AFE server can be supplied, if desired.
70
71  !!!IMPORTANT NOTE!!!  The AFE server can only be called from the main
72  thread/process of a program.  If you launch threads and try to call it
73  from a thread, you will get an error.  This has to do with restrictions
74  in the Python virtual machine (and signal handling) and cannot be changed.
75  """
76
77  LOCAL_SERVER = 'chrotomation2.svl.corp.google.com'
78
79  def __init__(self,
80               remotes,
81               force_option,
82               chromeos_root,
83               local_server,
84               use_local=True,
85               log=None):
86    """Initializes an AFELockManager object.
87
88    Args:
89      remotes: A list of machine names or ip addresses to be managed.  Names
90        and ip addresses should be represented as strings.  If the list is
91        empty, the lock manager will get all known machines.
92      force_option: A Boolean indicating whether or not to force an unlock of
93        a machine that was locked by someone else.
94      chromeos_root: The ChromeOS chroot to use for the autotest scripts.
95      local_server: A string containing the name or ip address of the machine
96        that is running an AFE server, which is to be used for managing
97        machines that are not in the ChromeOS HW lab.
98      local: A Boolean indicating whether or not to use/allow a local AFE
99        server to be used (see local_server argument).
100      use_local: Use the local server instead of the official one.
101      log: If not None, this is the logger object to be used for writing out
102        informational output messages.  It is expected to be an instance of
103        Logger class from cros_utils/logger.py.
104    """
105    self.chromeos_root = chromeos_root
106    self.user = getpass.getuser()
107    self.logger = log or logger.GetLogger()
108    autotest_path = os.path.join(chromeos_root,
109                                 'src/third_party/autotest/files')
110
111    sys.path.append(chromeos_root)
112    sys.path.append(autotest_path)
113    sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
114
115    # We have to wait to do these imports until the paths above have
116    # been fixed.
117    # pylint: disable=import-error
118    from client import setup_modules
119    setup_modules.setup(
120        base_path=autotest_path, root_module_name='autotest_lib')
121
122    from dynamic_suite import frontend_wrappers
123
124    self.afe = frontend_wrappers.RetryingAFE(
125        timeout_min=30, delay_sec=10, debug=False, server='cautotest')
126
127    self.local = use_local
128    self.machines = list(set(remotes)) or []
129    self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
130    if self.machines and self.AllLabMachines():
131      self.local = False
132
133    if not self.local:
134      self.local_afe = None
135    else:
136      dargs = {}
137      dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
138      # Make sure local server is pingable.
139      error_msg = ('Local autotest server machine %s not responding to ping.' %
140                   dargs['server'])
141      self.CheckMachine(dargs['server'], error_msg)
142      self.local_afe = frontend_wrappers.RetryingAFE(
143          timeout_min=30, delay_sec=10, debug=False, **dargs)
144    if not self.machines:
145      self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
146    self.force = force_option
147
148  def AllLabMachines(self):
149    """Check to see if all machines being used are HW Lab machines."""
150    all_lab = True
151    for m in self.machines:
152      if m not in self.toolchain_lab_machines:
153        all_lab = False
154        break
155    return all_lab
156
157  def CheckMachine(self, machine, error_msg):
158    """Verifies that machine is responding to ping.
159
160    Args:
161      machine: String containing the name or ip address of machine to check.
162      error_msg: Message to print if ping fails.
163
164    Raises:
165      MachineNotPingable:  If machine is not responding to 'ping'
166    """
167    if not machines.MachineIsPingable(machine, logging_level='none'):
168      cros_machine = machine + '.cros'
169      if not machines.MachineIsPingable(cros_machine, logging_level='none'):
170        raise MachineNotPingable(error_msg)
171
172  def MachineIsKnown(self, machine):
173    """Checks to see if either AFE server knows the given machine.
174
175    Args:
176      machine: String containing name or ip address of machine to check.
177
178    Returns:
179      Boolean indicating if the machine is in the list of known machines for
180        either AFE server.
181    """
182    if machine in self.toolchain_lab_machines:
183      return True
184    elif self.local_afe and machine in self.GetAllNonlabMachines():
185      return True
186
187    return False
188
189  def GetAllToolchainLabMachines(self):
190    """Gets a list of all the toolchain machines in the ChromeOS HW lab.
191
192    Returns:
193      A list of names of the toolchain machines in the ChromeOS HW lab.
194    """
195    machines_file = os.path.join(
196        os.path.dirname(__file__), 'crosperf', 'default_remotes')
197    machine_list = []
198    with open(machines_file, 'r') as input_file:
199      lines = input_file.readlines()
200      for line in lines:
201        _, remotes = line.split(':')
202        remotes = remotes.strip()
203        for r in remotes.split():
204          machine_list.append(r.strip())
205    return machine_list
206
207  def GetAllNonlabMachines(self):
208    """Gets a list of all known machines on the local AFE server.
209
210    Returns:
211      A list of the names of the machines on the local AFE server.
212    """
213    non_lab_machines = []
214    if self.local_afe:
215      non_lab_machines = self.local_afe.get_hostnames()
216    return non_lab_machines
217
218  def PrintStatusHeader(self, is_lab_machine):
219    """Prints the status header lines for machines.
220
221    Args:
222      is_lab_machine: Boolean indicating whether to print HW Lab header or
223        local machine header (different spacing).
224    """
225    if is_lab_machine:
226      print('\nMachine (Board)\t\t\t\t\tStatus')
227      print('---------------\t\t\t\t\t------\n')
228    else:
229      print('\nMachine (Board)\t\tStatus')
230      print('---------------\t\t------\n')
231
232  def RemoveLocalMachine(self, m):
233    """Removes a machine from the local AFE server.
234
235    Args:
236      m: The machine to remove.
237
238    Raises:
239      MissingHostInfo:  Can't find machine to be removed.
240    """
241    if self.local_afe:
242      host_info = self.local_afe.get_hosts(hostname=m)
243      if host_info:
244        host_info = host_info[0]
245        host_info.delete()
246      else:
247        raise MissingHostInfo('Cannot find/delete machine %s.' % m)
248
249  def AddLocalMachine(self, m):
250    """Adds a machine to the local AFE server.
251
252    Args:
253      m: The machine to be added.
254    """
255    if self.local_afe:
256      error_msg = 'Machine %s is not responding to ping.' % m
257      self.CheckMachine(m, error_msg)
258      self.local_afe.create_host(m)
259
260  def AddMachinesToLocalServer(self):
261    """Adds one or more machines to the local AFE server.
262
263    Verify that the requested machines are legal to add to the local server,
264    i.e. that they are not ChromeOS HW lab machines, and they are not already
265    on the local server.  Call AddLocalMachine for each valid machine.
266
267    Raises:
268      DuplicateAdd: Attempt to add a machine that is already on the server.
269      UpdateNonLocalMachine:  Attempt to add a ChromeOS HW lab machine.
270      UpdateServerError:  Something went wrong while attempting to add a
271        machine.
272    """
273    for m in self.machines:
274      for cros_name in [m, m + '.cros']:
275        if cros_name in self.toolchain_lab_machines:
276          raise UpdateNonLocalMachine(
277              'Machine %s is already in the ChromeOS HW'
278              'Lab.  Cannot add it to local server.' % cros_name)
279      host_info = self.local_afe.get_hosts(hostname=m)
280      if host_info:
281        raise DuplicateAdd('Machine %s is already on the local server.' % m)
282      try:
283        self.AddLocalMachine(m)
284        self.logger.LogOutput('Successfully added %s to local server.' % m)
285      except Exception as e:
286        traceback.print_exc()
287        raise UpdateServerError(
288            'Error occurred while attempting to add %s. %s' % (m, str(e)))
289
290  def RemoveMachinesFromLocalServer(self):
291    """Removes one or more machines from the local AFE server.
292
293    Verify that the requested machines are legal to remove from the local
294    server, i.e. that they are not ChromeOS HW lab machines.  Call
295    RemoveLocalMachine for each valid machine.
296
297    Raises:
298      UpdateServerError:  Something went wrong while attempting to remove a
299        machine.
300    """
301    for m in self.machines:
302      for cros_name in [m, m + '.cros']:
303        if cros_name in self.toolchain_lab_machines:
304          raise UpdateNonLocalMachine(
305              'Machine %s is in the ChromeOS HW Lab. '
306              'This script cannot remove lab machines.' % cros_name)
307      try:
308        self.RemoveLocalMachine(m)
309        self.logger.LogOutput('Successfully removed %s from local server.' % m)
310      except Exception as e:
311        traceback.print_exc()
312        raise UpdateServerError('Error occurred while attempting to remove %s '
313                                '(%s).' % (m, str(e)))
314
315  def ListMachineStates(self, machine_states):
316    """Gets and prints the current status for a list of machines.
317
318    Prints out the current status for all of the machines in the current
319    AFELockManager's list of machines (set when the object is initialized).
320
321    Args:
322      machine_states: A dictionary of the current state of every machine in
323        the current AFELockManager's list of machines.  Normally obtained by
324        calling AFELockManager::GetMachineStates.
325    """
326    local_machines = []
327    printed_hdr = False
328    for m in machine_states:
329      cros_name = m + '.cros'
330      if (m in self.toolchain_lab_machines or
331          cros_name in self.toolchain_lab_machines):
332        name = m if m in self.toolchain_lab_machines else cros_name
333        if not printed_hdr:
334          self.PrintStatusHeader(True)
335          printed_hdr = True
336        state = machine_states[m]
337        if state['locked']:
338          print('%s (%s)\tlocked by %s since %s' %
339                (name, state['board'], state['locked_by'], state['lock_time']))
340        else:
341          print('%s (%s)\tunlocked' % (name, state['board']))
342      else:
343        local_machines.append(m)
344
345    if local_machines:
346      self.PrintStatusHeader(False)
347      for m in local_machines:
348        state = machine_states[m]
349        if state['locked']:
350          print('%s (%s)\tlocked by %s since %s' %
351                (m, state['board'], state['locked_by'], state['lock_time']))
352        else:
353          print('%s (%s)\tunlocked' % (m, state['board']))
354
355  def UpdateLockInAFE(self, should_lock_machine, machine):
356    """Calls an AFE server to lock/unlock a machine.
357
358    Args:
359      should_lock_machine: Boolean indicating whether to lock the machine (True)
360        or unlock the machine (False).
361      machine: The machine to update.
362
363    Raises:
364      LockingError:  An error occurred while attempting to update the machine
365        state.
366    """
367    action = 'lock'
368    if not should_lock_machine:
369      action = 'unlock'
370    kwargs = {'locked': should_lock_machine}
371    kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
372
373    cros_name = machine + '.cros'
374    if cros_name in self.toolchain_lab_machines:
375      machine = cros_name
376    if machine in self.toolchain_lab_machines:
377      m = machine.split('.')[0]
378      afe_server = self.afe
379    else:
380      m = machine
381      afe_server = self.local_afe
382
383    try:
384      afe_server.run(
385          'modify_hosts',
386          host_filter_data={'hostname__in': [m]},
387          update_data=kwargs)
388    except Exception as e:
389      traceback.print_exc()
390      raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
391
392  def UpdateMachines(self, lock_machines):
393    """Sets the locked state of the machines to the requested value.
394
395    The machines updated are the ones in self.machines (specified when the
396    class object was intialized).
397
398    Args:
399      lock_machines: Boolean indicating whether to lock the machines (True) or
400        unlock the machines (False).
401
402    Returns:
403      A list of the machines whose state was successfully updated.
404    """
405    updated_machines = []
406    for m in self.machines:
407      self.UpdateLockInAFE(lock_machines, m)
408      # Since we returned from self.UpdateLockInAFE we assume the request
409      # succeeded.
410      if lock_machines:
411        self.logger.LogOutput('Locked machine(s) %s.' % m)
412      else:
413        self.logger.LogOutput('Unlocked machine(s) %s.' % m)
414      updated_machines.append(m)
415
416    return updated_machines
417
418  def _InternalRemoveMachine(self, machine):
419    """Remove machine from internal list of machines.
420
421    Args:
422      machine: Name of machine to be removed from internal list.
423    """
424    # Check to see if machine is lab machine and if so, make sure it has
425    # ".cros" on the end.
426    cros_machine = machine
427    if machine.find('rack') > 0 and machine.find('row') > 0:
428      if machine.find('.cros') == -1:
429        cros_machine = cros_machine + '.cros'
430
431    self.machines = [
432        m for m in self.machines if m != cros_machine and m != machine
433    ]
434
435  def CheckMachineLocks(self, machine_states, cmd):
436    """Check that every machine in requested list is in the proper state.
437
438    If the cmd is 'unlock' verify that every machine is locked by requestor.
439    If the cmd is 'lock' verify that every machine is currently unlocked.
440
441    Args:
442      machine_states: A dictionary of the current state of every machine in
443        the current AFELockManager's list of machines.  Normally obtained by
444        calling AFELockManager::GetMachineStates.
445      cmd: The user-requested action for the machines: 'lock' or 'unlock'.
446
447    Raises:
448      DontOwnLock: The lock on a requested machine is owned by someone else.
449    """
450    for k, state in machine_states.iteritems():
451      if cmd == 'unlock':
452        if not state['locked']:
453          self.logger.LogWarning('Attempt to unlock already unlocked machine '
454                                 '(%s).' % k)
455          self._InternalRemoveMachine(k)
456
457        if state['locked'] and state['locked_by'] != self.user:
458          raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
459                            'else (%s).' % (k, state['locked_by']))
460      elif cmd == 'lock':
461        if state['locked']:
462          self.logger.LogWarning(
463              'Attempt to lock already locked machine (%s)' % k)
464          self._InternalRemoveMachine(k)
465
466  def HasAFEServer(self, local):
467    """Verifies that the AFELockManager has appropriate AFE server.
468
469    Args:
470      local: Boolean indicating whether we are checking for the local server
471        (True) or for the global server (False).
472
473    Returns:
474      A boolean indicating if the AFELockManager has the requested AFE server.
475    """
476    if local:
477      return self.local_afe is not None
478    else:
479      return self.afe is not None
480
481  def GetMachineStates(self, cmd=''):
482    """Gets the current state of all the requested machines.
483
484    Gets the current state of all the requested machines, both from the HW lab
485    sever and from the local server.  Stores the data in a dictionary keyed
486    by machine name.
487
488    Args:
489      cmd: The command for which we are getting the machine states. This is
490        important because if one of the requested machines is missing we raise
491        an exception, unless the requested command is 'add'.
492
493    Returns:
494      A dictionary of machine states for all the machines in the AFELockManager
495      object.
496
497    Raises:
498      NoAFEServer:  Cannot find the HW Lab or local AFE server.
499      AFEAccessError:  An error occurred when querying the server about a
500        machine.
501    """
502    if not self.HasAFEServer(False):
503      raise NoAFEServer('Error: Cannot connect to main AFE server.')
504
505    if self.local and not self.HasAFEServer(True):
506      raise NoAFEServer('Error: Cannot connect to local AFE server.')
507
508    machine_list = {}
509    for m in self.machines:
510      host_info = None
511      cros_name = m + '.cros'
512      if (m in self.toolchain_lab_machines or
513          cros_name in self.toolchain_lab_machines):
514        mod_host = m.split('.')[0]
515        host_info = self.afe.get_hosts(hostname=mod_host)
516        if not host_info:
517          raise AFEAccessError('Unable to get information about %s from main'
518                               ' autotest server.' % m)
519      else:
520        host_info = self.local_afe.get_hosts(hostname=m)
521        if not host_info and cmd != 'add':
522          raise AFEAccessError('Unable to get information about %s from '
523                               'local autotest server.' % m)
524      if host_info:
525        host_info = host_info[0]
526        name = host_info.hostname
527        values = {}
528        values['board'] = host_info.platform if host_info.platform else '??'
529        values['locked'] = host_info.locked
530        if host_info.locked:
531          values['locked_by'] = host_info.locked_by
532          values['lock_time'] = host_info.lock_time
533        else:
534          values['locked_by'] = ''
535          values['lock_time'] = ''
536        machine_list[name] = values
537      else:
538        machine_list[m] = {}
539    return machine_list
540
541
542def Main(argv):
543  """Parse the options, initialize lock manager and dispatch proper method.
544
545  Args:
546    argv: The options with which this script was invoked.
547
548  Returns:
549    0 unless an exception is raised.
550  """
551  parser = argparse.ArgumentParser()
552
553  parser.add_argument(
554      '--list',
555      dest='cmd',
556      action='store_const',
557      const='status',
558      help='List current status of all known machines.')
559  parser.add_argument(
560      '--lock',
561      dest='cmd',
562      action='store_const',
563      const='lock',
564      help='Lock given machine(s).')
565  parser.add_argument(
566      '--unlock',
567      dest='cmd',
568      action='store_const',
569      const='unlock',
570      help='Unlock given machine(s).')
571  parser.add_argument(
572      '--status',
573      dest='cmd',
574      action='store_const',
575      const='status',
576      help='List current status of given machine(s).')
577  parser.add_argument(
578      '--add_machine',
579      dest='cmd',
580      action='store_const',
581      const='add',
582      help='Add machine to local machine server.')
583  parser.add_argument(
584      '--remove_machine',
585      dest='cmd',
586      action='store_const',
587      const='remove',
588      help='Remove machine from the local machine server.')
589  parser.add_argument(
590      '--nolocal',
591      dest='local',
592      action='store_false',
593      default=True,
594      help='Do not try to use local machine server.')
595  parser.add_argument(
596      '--remote', dest='remote', help='machines on which to operate')
597  parser.add_argument(
598      '--chromeos_root',
599      dest='chromeos_root',
600      required=True,
601      help='ChromeOS root to use for autotest scripts.')
602  parser.add_argument(
603      '--local_server',
604      dest='local_server',
605      default=None,
606      help='Alternate local autotest server to use.')
607  parser.add_argument(
608      '--force',
609      dest='force',
610      action='store_true',
611      default=False,
612      help='Force lock/unlock of machines, even if not'
613      ' current lock owner.')
614
615  options = parser.parse_args(argv)
616
617  if not options.remote and options.cmd != 'status':
618    parser.error('No machines specified for operation.')
619
620  if not os.path.isdir(options.chromeos_root):
621    parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
622
623  if not options.cmd:
624    parser.error('No operation selected (--list, --status, --lock, --unlock,'
625                 ' --add_machine, --remove_machine).')
626
627  machine_list = []
628  if options.remote:
629    machine_list = options.remote.split()
630
631  lock_manager = AFELockManager(machine_list, options.force,
632                                options.chromeos_root, options.local_server,
633                                options.local)
634
635  machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
636  cmd = options.cmd
637
638  if cmd == 'status':
639    lock_manager.ListMachineStates(machine_states)
640
641  elif cmd == 'lock':
642    if not lock_manager.force:
643      lock_manager.CheckMachineLocks(machine_states, cmd)
644      lock_manager.UpdateMachines(True)
645
646  elif cmd == 'unlock':
647    if not lock_manager.force:
648      lock_manager.CheckMachineLocks(machine_states, cmd)
649      lock_manager.UpdateMachines(False)
650
651  elif cmd == 'add':
652    lock_manager.AddMachinesToLocalServer()
653
654  elif cmd == 'remove':
655    lock_manager.RemoveMachinesFromLocalServer()
656
657  return 0
658
659
660if __name__ == '__main__':
661  sys.exit(Main(sys.argv[1:]))
662