• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python2
2#
3# Copyright 2015 Google INc.  All Rights Reserved.
4"""This module controls locking and unlocking of test machines."""
5
6from __future__ import print_function
7
8import argparse
9import getpass
10import os
11import sys
12import traceback
13
14from cros_utils import logger
15from cros_utils import machines
16
17
18class AFELockException(Exception):
19  """Base class for exceptions in this module."""
20
21
22class MachineNotPingable(AFELockException):
23  """Raised when machine does not respond to ping."""
24
25
26class MissingHostInfo(AFELockException):
27  """Raised when cannot find info about machine on machine servers."""
28
29
30class UpdateNonLocalMachine(AFELockException):
31  """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
32
33
34class DuplicateAdd(AFELockException):
35  """Raised when user requests to add a machine that's already on the server."""
36
37
38class UpdateServerError(AFELockException):
39  """Raised when attempt to add/remove a machine from local server fails."""
40
41
42class LockingError(AFELockException):
43  """Raised when server fails to lock/unlock machine as requested."""
44
45
46class DontOwnLock(AFELockException):
47  """Raised when user attmepts to unlock machine locked by someone else."""
48  # This should not be raised if the user specified '--force'
49
50
51class NoAFEServer(AFELockException):
52  """Raised when cannot find/access the autotest server."""
53
54
55class AFEAccessError(AFELockException):
56  """Raised when cannot get information about lab machine from lab server."""
57
58
59class AFELockManager(object):
60  """Class for locking/unlocking machines vie Autotest Front End servers.
61
62  This class contains methods for checking the locked status of machines
63  on both the ChromeOS HW Lab AFE server and a local AFE server.  It also
64  has methods for adding/removing machines from the local server, and for
65  changing the lock status of machines on either server.  For the ChromeOS
66  HW Lab, it only allows access to the toolchain team lab machines, as
67  defined in toolchain-utils/crosperf/default_remotes.  By default it will
68  look for a local server on chrotomation2.mtv.corp.google.com, but an
69  alternative local AFE server can be supplied, if desired.
70
71  !!!IMPORTANT NOTE!!!  The AFE server can only be called from the main
72  thread/process of a program.  If you launch threads and try to call it
73  from a thread, you will get an error.  This has to do with restrictions
74  in the Python virtual machine (and signal handling) and cannot be changed.
75  """
76
77  LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com'
78
79  def __init__(self,
80               remotes,
81               force_option,
82               chromeos_root,
83               local_server,
84               use_local=True,
85               log=None):
86    """Initializes an AFELockManager object.
87
88    Args:
89      remotes: A list of machine names or ip addresses to be managed.  Names
90        and ip addresses should be represented as strings.  If the list is
91        empty, the lock manager will get all known machines.
92      force_option: A Boolean indicating whether or not to force an unlock of
93        a machine that was locked by someone else.
94      chromeos_root: The ChromeOS chroot to use for the autotest scripts.
95      local_server: A string containing the name or ip address of the machine
96        that is running an AFE server, which is to be used for managing
97        machines that are not in the ChromeOS HW lab.
98      local: A Boolean indicating whether or not to use/allow a local AFE
99        server to be used (see local_server argument).
100      log: If not None, this is the logger object to be used for writing out
101        informational output messages.  It is expected to be an instance of
102        Logger class from cros_utils/logger.py.
103    """
104    self.chromeos_root = chromeos_root
105    self.user = getpass.getuser()
106    self.logger = log or logger.GetLogger()
107    autotest_path = os.path.join(chromeos_root,
108                                 'src/third_party/autotest/files')
109
110    sys.path.append(chromeos_root)
111    sys.path.append(autotest_path)
112    sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
113
114    # We have to wait to do these imports until the paths above have
115    # been fixed.
116    # pylint: disable=import-error
117    from client import setup_modules
118    setup_modules.setup(
119        base_path=autotest_path, root_module_name='autotest_lib')
120
121    from dynamic_suite import frontend_wrappers
122
123    self.afe = frontend_wrappers.RetryingAFE(
124        timeout_min=30, delay_sec=10, debug=False, server='cautotest')
125
126    self.local = use_local
127    self.machines = list(set(remotes)) or []
128    self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
129    if self.machines and self.AllLabMachines():
130      self.local = False
131
132    if not self.local:
133      self.local_afe = None
134    else:
135      dargs = {}
136      dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
137      # Make sure local server is pingable.
138      error_msg = ('Local autotest server machine %s not responding to ping.' %
139                   dargs['server'])
140      self.CheckMachine(dargs['server'], error_msg)
141      self.local_afe = frontend_wrappers.RetryingAFE(
142          timeout_min=30, delay_sec=10, debug=False, **dargs)
143    if not self.machines:
144      self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
145    self.force = force_option
146
147  def AllLabMachines(self):
148    """Check to see if all machines being used are HW Lab machines."""
149    all_lab = True
150    for m in self.machines:
151      if m not in self.toolchain_lab_machines:
152        all_lab = False
153        break
154    return all_lab
155
156  def CheckMachine(self, machine, error_msg):
157    """Verifies that machine is responding to ping.
158
159    Args:
160      machine: String containing the name or ip address of machine to check.
161      error_msg: Message to print if ping fails.
162
163    Raises:
164      MachineNotPingable:  If machine is not responding to 'ping'
165    """
166    if not machines.MachineIsPingable(machine, logging_level='none'):
167      cros_machine = machine + '.cros'
168      if not machines.MachineIsPingable(cros_machine, logging_level='none'):
169        raise MachineNotPingable(error_msg)
170
171  def MachineIsKnown(self, machine):
172    """Checks to see if either AFE server knows the given machine.
173
174    Args:
175      machine: String containing name or ip address of machine to check.
176
177    Returns:
178      Boolean indicating if the machine is in the list of known machines for
179        either AFE server.
180    """
181    if machine in self.toolchain_lab_machines:
182      return True
183    elif self.local_afe and machine in self.GetAllNonlabMachines():
184      return True
185
186    return False
187
188  def GetAllToolchainLabMachines(self):
189    """Gets a list of all the toolchain machines in the ChromeOS HW lab.
190
191    Returns:
192      A list of names of the toolchain machines in the ChromeOS HW lab.
193    """
194    machines_file = os.path.join(
195        os.path.dirname(__file__), 'crosperf', 'default_remotes')
196    machine_list = []
197    with open(machines_file, 'r') as input_file:
198      lines = input_file.readlines()
199      for line in lines:
200        _, remotes = line.split(':')
201        remotes = remotes.strip()
202        for r in remotes.split():
203          machine_list.append(r.strip())
204    return machine_list
205
206  def GetAllNonlabMachines(self):
207    """Gets a list of all known machines on the local AFE server.
208
209    Returns:
210      A list of the names of the machines on the local AFE server.
211    """
212    non_lab_machines = []
213    if self.local_afe:
214      non_lab_machines = self.local_afe.get_hostnames()
215    return non_lab_machines
216
217  def PrintStatusHeader(self, is_lab_machine):
218    """Prints the status header lines for machines.
219
220    Args:
221      is_lab_machine: Boolean indicating whether to print HW Lab header or
222        local machine header (different spacing).
223    """
224    if is_lab_machine:
225      print('\nMachine (Board)\t\t\t\t\tStatus')
226      print('---------------\t\t\t\t\t------\n')
227    else:
228      print('\nMachine (Board)\t\tStatus')
229      print('---------------\t\t------\n')
230
231  def RemoveLocalMachine(self, m):
232    """Removes a machine from the local AFE server.
233
234    Args:
235      m: The machine to remove.
236
237    Raises:
238      MissingHostInfo:  Can't find machine to be removed.
239    """
240    if self.local_afe:
241      host_info = self.local_afe.get_hosts(hostname=m)
242      if host_info:
243        host_info = host_info[0]
244        host_info.delete()
245      else:
246        raise MissingHostInfo('Cannot find/delete machine %s.' % m)
247
248  def AddLocalMachine(self, m):
249    """Adds a machine to the local AFE server.
250
251    Args:
252      m: The machine to be added.
253    """
254    if self.local_afe:
255      error_msg = 'Machine %s is not responding to ping.' % m
256      self.CheckMachine(m, error_msg)
257      self.local_afe.create_host(m)
258
259  def AddMachinesToLocalServer(self):
260    """Adds one or more machines to the local AFE server.
261
262    Verify that the requested machines are legal to add to the local server,
263    i.e. that they are not ChromeOS HW lab machines, and they are not already
264    on the local server.  Call AddLocalMachine for each valid machine.
265
266    Raises:
267      DuplicateAdd: Attempt to add a machine that is already on the server.
268      UpdateNonLocalMachine:  Attempt to add a ChromeOS HW lab machine.
269      UpdateServerError:  Something went wrong while attempting to add a
270        machine.
271    """
272    for m in self.machines:
273      for cros_name in [m, m + '.cros']:
274        if cros_name in self.toolchain_lab_machines:
275          raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW'
276                                      'Lab.  Cannot add it to local server.' %
277                                      cros_name)
278      host_info = self.local_afe.get_hosts(hostname=m)
279      if host_info:
280        raise DuplicateAdd('Machine %s is already on the local server.' % m)
281      try:
282        self.AddLocalMachine(m)
283        self.logger.LogOutput('Successfully added %s to local server.' % m)
284      except Exception as e:
285        traceback.print_exc()
286        raise UpdateServerError(
287            'Error occurred while attempting to add %s. %s' % (m, str(e)))
288
289  def RemoveMachinesFromLocalServer(self):
290    """Removes one or more machines from the local AFE server.
291
292    Verify that the requested machines are legal to remove from the local
293    server, i.e. that they are not ChromeOS HW lab machines.  Call
294    RemoveLocalMachine for each valid machine.
295
296    Raises:
297      UpdateServerError:  Something went wrong while attempting to remove a
298        machine.
299    """
300    for m in self.machines:
301      for cros_name in [m, m + '.cros']:
302        if cros_name in self.toolchain_lab_machines:
303          raise UpdateNonLocalMachine(
304              'Machine %s is in the ChromeOS HW Lab. '
305              'This script cannot remove lab machines.' % cros_name)
306      try:
307        self.RemoveLocalMachine(m)
308        self.logger.LogOutput('Successfully removed %s from local server.' % m)
309      except Exception as e:
310        traceback.print_exc()
311        raise UpdateServerError('Error occurred while attempting to remove %s '
312                                '(%s).' % (m, str(e)))
313
314  def ListMachineStates(self, machine_states):
315    """Gets and prints the current status for a list of machines.
316
317    Prints out the current status for all of the machines in the current
318    AFELockManager's list of machines (set when the object is initialized).
319
320    Args:
321      machine_states: A dictionary of the current state of every machine in
322        the current AFELockManager's list of machines.  Normally obtained by
323        calling AFELockManager::GetMachineStates.
324    """
325    local_machines = []
326    printed_hdr = False
327    for m in machine_states:
328      cros_name = m + '.cros'
329      if (m in self.toolchain_lab_machines or
330          cros_name in self.toolchain_lab_machines):
331        name = m if m in self.toolchain_lab_machines else cros_name
332        if not printed_hdr:
333          self.PrintStatusHeader(True)
334          printed_hdr = True
335        state = machine_states[m]
336        if state['locked']:
337          print('%s (%s)\tlocked by %s since %s' %
338                (name, state['board'], state['locked_by'], state['lock_time']))
339        else:
340          print('%s (%s)\tunlocked' % (name, state['board']))
341      else:
342        local_machines.append(m)
343
344    if local_machines:
345      self.PrintStatusHeader(False)
346      for m in local_machines:
347        state = machine_states[m]
348        if state['locked']:
349          print('%s (%s)\tlocked by %s since %s' %
350                (m, state['board'], state['locked_by'], state['lock_time']))
351        else:
352          print('%s (%s)\tunlocked' % (m, state['board']))
353
354  def UpdateLockInAFE(self, should_lock_machine, machine):
355    """Calls an AFE server to lock/unlock a machine.
356
357    Args:
358      should_lock_machine: Boolean indicating whether to lock the machine (True)
359        or unlock the machine (False).
360      machine: The machine to update.
361
362    Raises:
363      LockingError:  An error occurred while attempting to update the machine
364        state.
365    """
366    action = 'lock'
367    if not should_lock_machine:
368      action = 'unlock'
369    kwargs = {'locked': should_lock_machine}
370    kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
371
372    cros_name = machine + '.cros'
373    if cros_name in self.toolchain_lab_machines:
374      machine = cros_name
375    if machine in self.toolchain_lab_machines:
376      m = machine.split('.')[0]
377      afe_server = self.afe
378    else:
379      m = machine
380      afe_server = self.local_afe
381
382    try:
383      afe_server.run('modify_hosts',
384                     host_filter_data={'hostname__in': [m]},
385                     update_data=kwargs)
386    except Exception as e:
387      traceback.print_exc()
388      raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
389
390  def UpdateMachines(self, lock_machines):
391    """Sets the locked state of the machines to the requested value.
392
393    The machines updated are the ones in self.machines (specified when the
394    class object was intialized).
395
396    Args:
397      lock_machines: Boolean indicating whether to lock the machines (True) or
398        unlock the machines (False).
399
400    Returns:
401      A list of the machines whose state was successfully updated.
402    """
403    updated_machines = []
404    for m in self.machines:
405      self.UpdateLockInAFE(lock_machines, m)
406      # Since we returned from self.UpdateLockInAFE we assume the request
407      # succeeded.
408      if lock_machines:
409        self.logger.LogOutput('Locked machine(s) %s.' % m)
410      else:
411        self.logger.LogOutput('Unlocked machine(s) %s.' % m)
412      updated_machines.append(m)
413
414    return updated_machines
415
416  def _InternalRemoveMachine(self, machine):
417    """Remove machine from internal list of machines.
418
419    Args:
420      machine: Name of machine to be removed from internal list.
421    """
422    # Check to see if machine is lab machine and if so, make sure it has
423    # ".cros" on the end.
424    cros_machine = machine
425    if machine.find('rack') > 0 and machine.find('row') > 0:
426      if machine.find('.cros') == -1:
427        cros_machine = cros_machine + '.cros'
428
429    self.machines = [m for m in self.machines
430                     if m != cros_machine and m != machine]
431
432  def CheckMachineLocks(self, machine_states, cmd):
433    """Check that every machine in requested list is in the proper state.
434
435    If the cmd is 'unlock' verify that every machine is locked by requestor.
436    If the cmd is 'lock' verify that every machine is currently unlocked.
437
438    Args:
439      machine_states: A dictionary of the current state of every machine in
440        the current AFELockManager's list of machines.  Normally obtained by
441        calling AFELockManager::GetMachineStates.
442      cmd: The user-requested action for the machines: 'lock' or 'unlock'.
443
444    Raises:
445      DontOwnLock: The lock on a requested machine is owned by someone else.
446    """
447    for k, state in machine_states.iteritems():
448      if cmd == 'unlock':
449        if not state['locked']:
450          self.logger.LogWarning('Attempt to unlock already unlocked machine '
451                                 '(%s).' % k)
452          self._InternalRemoveMachine(k)
453
454        if state['locked'] and state['locked_by'] != self.user:
455          raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
456                            'else (%s).' % (k, state['locked_by']))
457      elif cmd == 'lock':
458        if state['locked']:
459          self.logger.LogWarning('Attempt to lock already locked machine (%s)' %
460                                 k)
461          self._InternalRemoveMachine(k)
462
463  def HasAFEServer(self, local):
464    """Verifies that the AFELockManager has appropriate AFE server.
465
466    Args:
467      local: Boolean indicating whether we are checking for the local server
468        (True) or for the global server (False).
469
470    Returns:
471      A boolean indicating if the AFELockManager has the requested AFE server.
472    """
473    if local:
474      return self.local_afe is not None
475    else:
476      return self.afe is not None
477
478  def GetMachineStates(self, cmd=''):
479    """Gets the current state of all the requested machines.
480
481    Gets the current state of all the requested machines, both from the HW lab
482    sever and from the local server.  Stores the data in a dictionary keyed
483    by machine name.
484
485    Args:
486      cmd: The command for which we are getting the machine states. This is
487        important because if one of the requested machines is missing we raise
488        an exception, unless the requested command is 'add'.
489
490    Returns:
491      A dictionary of machine states for all the machines in the AFELockManager
492      object.
493
494    Raises:
495      NoAFEServer:  Cannot find the HW Lab or local AFE server.
496      AFEAccessError:  An error occurred when querying the server about a
497        machine.
498    """
499    if not self.HasAFEServer(False):
500      raise NoAFEServer('Error: Cannot connect to main AFE server.')
501
502    if self.local and not self.HasAFEServer(True):
503      raise NoAFEServer('Error: Cannot connect to local AFE server.')
504
505    machine_list = {}
506    for m in self.machines:
507      host_info = None
508      cros_name = m + '.cros'
509      if (m in self.toolchain_lab_machines or
510          cros_name in self.toolchain_lab_machines):
511        mod_host = m.split('.')[0]
512        host_info = self.afe.get_hosts(hostname=mod_host)
513        if not host_info:
514          raise AFEAccessError('Unable to get information about %s from main'
515                               ' autotest server.' % m)
516      else:
517        host_info = self.local_afe.get_hosts(hostname=m)
518        if not host_info and cmd != 'add':
519          raise AFEAccessError('Unable to get information about %s from '
520                               'local autotest server.' % m)
521      if host_info:
522        host_info = host_info[0]
523        name = host_info.hostname
524        values = {}
525        values['board'] = host_info.platform if host_info.platform else '??'
526        values['locked'] = host_info.locked
527        if host_info.locked:
528          values['locked_by'] = host_info.locked_by
529          values['lock_time'] = host_info.lock_time
530        else:
531          values['locked_by'] = ''
532          values['lock_time'] = ''
533        machine_list[name] = values
534      else:
535        machine_list[m] = {}
536    return machine_list
537
538
539def Main(argv):
540  """Parse the options, initialize lock manager and dispatch proper method.
541
542  Args:
543    argv: The options with which this script was invoked.
544
545  Returns:
546    0 unless an exception is raised.
547  """
548  parser = argparse.ArgumentParser()
549
550  parser.add_argument(
551      '--list',
552      dest='cmd',
553      action='store_const',
554      const='status',
555      help='List current status of all known machines.')
556  parser.add_argument(
557      '--lock',
558      dest='cmd',
559      action='store_const',
560      const='lock',
561      help='Lock given machine(s).')
562  parser.add_argument(
563      '--unlock',
564      dest='cmd',
565      action='store_const',
566      const='unlock',
567      help='Unlock given machine(s).')
568  parser.add_argument(
569      '--status',
570      dest='cmd',
571      action='store_const',
572      const='status',
573      help='List current status of given machine(s).')
574  parser.add_argument(
575      '--add_machine',
576      dest='cmd',
577      action='store_const',
578      const='add',
579      help='Add machine to local machine server.')
580  parser.add_argument(
581      '--remove_machine',
582      dest='cmd',
583      action='store_const',
584      const='remove',
585      help='Remove machine from the local machine server.')
586  parser.add_argument(
587      '--nolocal',
588      dest='local',
589      action='store_false',
590      default=True,
591      help='Do not try to use local machine server.')
592  parser.add_argument(
593      '--remote', dest='remote', help='machines on which to operate')
594  parser.add_argument(
595      '--chromeos_root',
596      dest='chromeos_root',
597      required=True,
598      help='ChromeOS root to use for autotest scripts.')
599  parser.add_argument(
600      '--local_server',
601      dest='local_server',
602      default=None,
603      help='Alternate local autotest server to use.')
604  parser.add_argument(
605      '--force',
606      dest='force',
607      action='store_true',
608      default=False,
609      help='Force lock/unlock of machines, even if not'
610      ' current lock owner.')
611
612  options = parser.parse_args(argv)
613
614  if not options.remote and options.cmd != 'status':
615    parser.error('No machines specified for operation.')
616
617  if not os.path.isdir(options.chromeos_root):
618    parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
619
620  if not options.cmd:
621    parser.error('No operation selected (--list, --status, --lock, --unlock,'
622                 ' --add_machine, --remove_machine).')
623
624  machine_list = []
625  if options.remote:
626    machine_list = options.remote.split()
627
628  lock_manager = AFELockManager(machine_list, options.force,
629                                options.chromeos_root, options.local_server,
630                                options.local)
631
632  machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
633  cmd = options.cmd
634
635  if cmd == 'status':
636    lock_manager.ListMachineStates(machine_states)
637
638  elif cmd == 'lock':
639    if not lock_manager.force:
640      lock_manager.CheckMachineLocks(machine_states, cmd)
641      lock_manager.UpdateMachines(True)
642
643  elif cmd == 'unlock':
644    if not lock_manager.force:
645      lock_manager.CheckMachineLocks(machine_states, cmd)
646      lock_manager.UpdateMachines(False)
647
648  elif cmd == 'add':
649    lock_manager.AddMachinesToLocalServer()
650
651  elif cmd == 'remove':
652    lock_manager.RemoveMachinesFromLocalServer()
653
654  return 0
655
656
657if __name__ == '__main__':
658  sys.exit(Main(sys.argv[1:]))
659