1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from distutils import version
6import cStringIO
7import HTMLParser
8import httplib
9import json
10import logging
11import multiprocessing
12import os
13import re
14import socket
15import time
16import urllib2
17import urlparse
18
19from autotest_lib.client.bin import utils as bin_utils
20from autotest_lib.client.common_lib import android_utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import utils
24from autotest_lib.client.common_lib.cros import retry
25from autotest_lib.server import utils as server_utils
26# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107
27
28try:
29    from chromite.lib import metrics
30except ImportError:
31    metrics = utils.metrics_mock
32
33
34CONFIG = global_config.global_config
35# This file is generated at build time and specifies, per suite and per test,
36# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
37# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
38#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
39#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
40#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
41# }
42DEPENDENCIES_FILE = 'test_suites/dependency_info'
43# Number of seconds for caller to poll devserver's is_staged call to check if
44# artifacts are staged.
45_ARTIFACT_STAGE_POLLING_INTERVAL = 5
46# Artifacts that should be staged when client calls devserver RPC to stage an
47# image.
48_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
49# Artifacts that should be staged when client calls devserver RPC to stage an
50# image with autotest artifact.
51_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
52                                                   'control_files,stateful,'
53                                                   'autotest_packages')
54# Artifacts that should be staged when client calls devserver RPC to stage an
55# Android build.
56_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions')
57SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
58        'CROS', 'skip_devserver_health_check', type=bool)
59# Number of seconds for the call to get devserver load to time out.
60TIMEOUT_GET_DEVSERVER_LOAD = 2.0
61
62# Android artifact path in devserver
63ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
64        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
65
66# Return value from a devserver RPC indicating the call succeeded.
67SUCCESS = 'Success'
68
69# The timeout minutes for a given devserver ssh call.
70DEVSERVER_SSH_TIMEOUT_MINS = 1
71
72# Error message for invalid devserver response.
73ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error'
74ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable'
75
76# Error message for devserver call timedout.
77ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout'
78
79# The timeout minutes for waiting a devserver staging.
80DEVSERVER_IS_STAGING_RETRY_MIN = 100
81
82# The timeout minutes for waiting a DUT auto-update finished.
83DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN = 100
84
85# The total times of devserver triggering CrOS auto-update.
86AU_RETRY_LIMIT = 2
87
88# Number of seconds for caller to poll devserver's get_au_status call to
89# check if cros auto-update is finished.
90CROS_AU_POLLING_INTERVAL = 10
91
92# Number of seconds for intervals between retrying auto-update calls.
93CROS_AU_RETRY_INTERVAL = 20
94
95# The file name for auto-update logs.
96CROS_AU_LOG_FILENAME = 'CrOS_update_%s_%s.log'
97
98# Provision error patterns.
99# People who see this should know that they shouldn't change these
100# classification strings. These strings are used for monitoring provision
101# failures. Any changes may mess up the stats.
102_EXCEPTION_PATTERNS = [
103        # Raised when devserver portfile does not exist on host.
104        (r".*Devserver portfile does not exist!.*$",
105         '(1) Devserver portfile does not exist on host'),
106        # Raised when devserver cannot copy packages to host.
107        (r".*Could not copy .* to device.*$",
108         '(2) Cannot copy packages to host'),
109        # Raised when devserver fails to run specific commands on host.
110        (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$",
111         '(3) Fail to run specific command on host'),
112        # Raised when new build fails to boot on the host.
113        (r'.*RootfsUpdateError: Build .* failed to boot on.*$',
114         '(4) Build failed to boot on host'),
115        # Raised when the auto-update process is timed out.
116        (r'.*The CrOS auto-update process is timed out, '
117         'thus will be terminated.*$',
118         '(5) Auto-update is timed out'),
119        # Raised when the host is not pingable.
120        (r".*DeviceNotPingableError.*$",
121         '(6) Host is not pingable during auto-update'),
122        # Raised when hosts have unexpected status after rootfs update.
123        (r'.*Update failed with unexpected update status: '
124         'UPDATE_STATUS_IDLE.*$',
125         '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs '
126         'update'),
127        # Raised when devserver returns non-json response to shard/drone.
128        (r'.*No JSON object could be decoded.*$',
129         '(8) Devserver returned non-json object'),
130        # Raised when devserver loses host's ssh connection
131        (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$',
132         "(9) Devserver lost host's ssh connection"),
133        # Raised when error happens in writing files to host
134        (r'.*Write failed\: Broken pipe.*$',
135         "(10) Broken pipe while writing or connecting to host")]
136
137PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
138        'CROS', 'prefer_local_devserver', type=bool, default=False)
139
140ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
141        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
142        default=False)
143
144# Directory to save auto-update logs
145AUTO_UPDATE_LOG_DIR = 'autoupdate_logs'
146
147DEFAULT_SUBNET_MASKBIT = 19
148
149# Metrics basepaths.
150METRICS_PATH = 'chromeos/autotest'
151PROVISION_PATH = METRICS_PATH + '/provision'
152
153
154class DevServerException(Exception):
155    """Raised when the dev server returns a non-200 HTTP response."""
156    pass
157
158
159class BadBuildException(DevServerException):
160    """Raised when build failed to boot on DUT."""
161    pass
162
163
164class RetryableProvisionException(DevServerException):
165    """Raised when provision fails due to a retryable reason."""
166    pass
167
168class DevServerOverloadException(Exception):
169    """Raised when the dev server returns a 502 HTTP response."""
170    pass
171
172class DevServerFailToLocateException(Exception):
173    """Raised when fail to locate any devserver."""
174    pass
175
176
177class DevServerExceptionClassifier(object):
178    """A Class represents exceptions raised from DUT by calling auto_update."""
179    def __init__(self, err, keep_full_trace=True):
180        """
181        @param err: A single string representing one time provision
182            error happened in auto_update().
183        @param keep_full_trace: True to keep the whole track trace of error.
184            False when just keep the last line.
185        """
186        self._err = err if keep_full_trace else err.split('\n')[-1]
187        self._classification = None
188
189    def _classify(self):
190        for err_pattern, classification in _EXCEPTION_PATTERNS:
191            if re.match(err_pattern, self._err):
192                return classification
193
194        return '(0) Unknown exception'
195
196    @property
197    def classification(self):
198        """Classify the error
199
200        @return: return a classified exception type (string) from
201            _EXCEPTION_PATTERNS or 'Unknown exception'. Current patterns in
202            _EXCEPTION_PATTERNS are very specific so that errors cannot match
203            more than one pattern.
204        """
205        if not self._classification:
206            self._classification = self._classify()
207        return self._classification
208
209    @property
210    def summary(self):
211        """Use one line to show the error message."""
212        return ' '.join(self._err.splitlines())
213
214    @property
215    def classified_exception(self):
216        """What kind of exception will be raised to higher.
217
218        @return: return a special Exception when the raised error is an
219            RootfsUpdateError. Otherwise, return general DevServerException.
220        """
221        # The classification of RootfsUpdateError in _EXCEPTION_PATTERNS starts
222        # with "(4)"
223        if self.classification.startswith('(4)'):
224            return BadBuildException
225
226        return DevServerException
227
228
229class MarkupStripper(HTMLParser.HTMLParser):
230    """HTML parser that strips HTML tags, coded characters like &
231
232    Works by, basically, not doing anything for any tags, and only recording
233    the content of text nodes in an internal data structure.
234    """
235    def __init__(self):
236        self.reset()
237        self.fed = []
238
239
240    def handle_data(self, d):
241        """Consume content of text nodes, store it away."""
242        self.fed.append(d)
243
244
245    def get_data(self):
246        """Concatenate and return all stored data."""
247        return ''.join(self.fed)
248
249
250def _strip_http_message(message):
251    """Strip the HTTP marker from the an HTTP message.
252
253    @param message: A string returned by an HTTP call.
254
255    @return: A string with HTTP marker being stripped.
256    """
257    strip = MarkupStripper()
258    try:
259        strip.feed(message.decode('utf_32'))
260    except UnicodeDecodeError:
261        strip.feed(message)
262    return strip.get_data()
263
264
265def _get_image_storage_server():
266    return CONFIG.get_config_value('CROS', 'image_storage_server', type=str)
267
268
269def _get_canary_channel_server():
270    """
271    Get the url of the canary-channel server,
272    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
273
274    @return: The url to the canary channel server.
275    """
276    return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str)
277
278
279def _get_storage_server_for_artifacts(artifacts=None):
280    """Gets the appropriate storage server for the given artifacts.
281
282    @param artifacts: A list of artifacts we need to stage.
283    @return: The address of the storage server that has these artifacts.
284             The default image storage server if no artifacts are specified.
285    """
286    factory_artifact = global_config.global_config.get_config_value(
287            'CROS', 'factory_artifact', type=str, default='')
288    if artifacts and factory_artifact and factory_artifact in artifacts:
289        return _get_canary_channel_server()
290    return _get_image_storage_server()
291
292
293def _gs_or_local_archive_url_args(archive_url):
294    """Infer the devserver call arguments to use with the given archive_url.
295
296    @param archive_url: The archive url to include the in devserver RPC. This
297            can either e a GS path or a local path.
298    @return: A dict of arguments to include in the devserver call.
299    """
300    if not archive_url:
301        return {}
302    elif archive_url.startswith('gs://'):
303        return {'archive_url': archive_url}
304    else:
305        # For a local path, we direct the devserver to move the files while
306        # staging. This is the fastest way to stage local files, but deletes the
307        # files from the source. This is OK because the files are available on
308        # the devserver once staged.
309        return {
310                'local_path': archive_url,
311                'delete_source': True,
312        }
313
314
315def _reverse_lookup_from_config(address):
316    """Look up hostname for the given IP address.
317
318    This uses the hostname-address map from the config file.
319
320    If multiple hostnames map to the same IP address, the first one
321    defined in the configuration file takes precedence.
322
323    @param address: IP address string
324    @returns: hostname string, or original input if not found
325    """
326    for hostname, addr in _get_hostname_addr_map().iteritems():
327        if addr == address:
328            return hostname
329    return address
330
331
332def _get_hostname_addr_map():
333    """Get hostname address mapping from config.
334
335    @return: dict mapping server hostnames to addresses
336    """
337    return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP')
338
339
340def _get_dev_server_list():
341    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
342
343
344def _get_crash_server_list():
345    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
346        default=[])
347
348
349def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
350                          exception_to_raise=DevServerException):
351    """A decorator to use with remote devserver calls.
352
353    This decorator converts urllib2.HTTPErrors into DevServerExceptions
354    with any embedded error info converted into plain text. The method
355    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
356    """
357    #pylint: disable=C0111
358
359    def inner_decorator(method):
360        label = method.__name__ if hasattr(method, '__name__') else None
361        def metrics_wrapper(*args, **kwargs):
362            @retry.retry((urllib2.URLError, error.CmdError,
363                          DevServerOverloadException),
364                         timeout_min=timeout_min,
365                         exception_to_raise=exception_to_raise,
366                        label=label)
367            def wrapper():
368                """This wrapper actually catches the HTTPError."""
369                try:
370                    return method(*args, **kwargs)
371                except urllib2.HTTPError as e:
372                    error_markup = e.read()
373                    raise DevServerException(_strip_http_message(error_markup))
374
375            try:
376                return wrapper()
377            except Exception as e:
378                if ERR_MSG_FOR_TIMED_OUT_CALL in str(e):
379                    dev_server = None
380                    if args and isinstance(args[0], DevServer):
381                        dev_server = args[0].hostname
382                    elif 'devserver' in kwargs:
383                        dev_server = get_hostname(kwargs['devserver'])
384
385                    logging.debug('RPC call %s has timed out on devserver %s.',
386                                  label, dev_server)
387                    c = metrics.Counter(
388                            'chromeos/autotest/devserver/call_timeout')
389                    c.increment(fields={'dev_server': dev_server,
390                                        'healthy': label})
391
392                raise
393
394        return metrics_wrapper
395
396    return inner_decorator
397
398
399def get_hostname(url):
400    """Get the hostname portion of a URL
401
402    schema://hostname:port/path
403
404    @param url: a Url string
405    @return: a hostname string
406    """
407    return urlparse.urlparse(url).hostname
408
409
410def get_resolved_hostname(url):
411    """Get the symbolic hostname from url.
412
413    If the given `url` uses a numeric IP address, try and find a
414    symbolic name from the hostname map in the config file.
415
416    @param url  The URL with which to perform the conversion/lookup.
417    """
418    return _reverse_lookup_from_config(get_hostname(url))
419
420
421class DevServer(object):
422    """Base class for all DevServer-like server stubs.
423
424    This is the base class for interacting with all Dev Server-like servers.
425    A caller should instantiate a sub-class of DevServer with:
426
427    host = SubClassServer.resolve(build)
428    server = SubClassServer(host)
429    """
430    _MIN_FREE_DISK_SPACE_GB = 20
431    _MAX_APACHE_CLIENT_COUNT = 75
432    # Threshold for the CPU load percentage for a devserver to be selected.
433    MAX_CPU_LOAD = 80.0
434    # Threshold for the network IO, set to 80MB/s
435    MAX_NETWORK_IO = 1024 * 1024 * 80
436    DISK_IO = 'disk_total_bytes_per_second'
437    NETWORK_IO = 'network_total_bytes_per_second'
438    CPU_LOAD = 'cpu_percent'
439    FREE_DISK = 'free_disk'
440    AU_PROCESS = 'au_process_count'
441    STAGING_THREAD_COUNT = 'staging_thread_count'
442    APACHE_CLIENT_COUNT = 'apache_client_count'
443
444
445    def __init__(self, devserver):
446        self._devserver = devserver
447
448
449    def url(self):
450        """Returns the url for this devserver."""
451        return self._devserver
452
453
454    @property
455    def hostname(self):
456        """Return devserver hostname parsed from the devserver URL.
457
458        Note that this is likely parsed from the devserver URL from
459        shadow_config.ini, meaning that the "hostname" part of the
460        devserver URL is actually an IP address.
461
462        @return hostname string
463        """
464        return get_hostname(self.url())
465
466
467    @property
468    def resolved_hostname(self):
469        """Return devserver hostname, resolved from its IP address.
470
471        Unlike the hostname property, this property attempts to look up
472        the proper hostname from the devserver IP address.  If lookup
473        fails, then fall back to whatever the hostname property would
474        have returned.
475
476        @return hostname string
477        """
478        return _reverse_lookup_from_config(self.hostname)
479
480
481    @staticmethod
482    def get_server_url(url):
483        """Get the devserver url from a repo url, which includes build info.
484
485        @param url: A job repo url.
486
487        @return A devserver url, e.g., http://127.0.0.10:8080
488        """
489        res = urlparse.urlparse(url)
490        if res.netloc:
491            return res.scheme + '://' + res.netloc
492
493
494    @classmethod
495    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
496        """A wrapper function to call get_devserver_load in parallel.
497
498        @param devserver: url of the devserver.
499        @param timeout_sec: Number of seconds before time out the devserver
500                            call.
501        @param output: An output queue to save results to.
502        """
503        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
504        if load:
505            load['devserver'] = devserver
506        output.put(load)
507
508
509    @classmethod
510    def get_devserver_load(cls, devserver,
511                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
512        """Returns True if the |devserver| is healthy to stage build.
513
514        @param devserver: url of the devserver.
515        @param timeout_min: How long to wait in minutes before deciding the
516                            the devserver is not up (float).
517
518        @return: A dictionary of the devserver's load.
519
520        """
521        call = cls._build_call(devserver, 'check_health')
522        @remote_devserver_call(timeout_min=timeout_min)
523        def get_load(devserver=devserver):
524            """Inner method that makes the call."""
525            return cls.run_call(call, timeout=timeout_min*60)
526
527        try:
528            return json.load(cStringIO.StringIO(get_load(devserver=devserver)))
529        except Exception as e:
530            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
531                          ' Error: %s', call, timeout_min * 60, e)
532
533
534    @classmethod
535    def is_free_disk_ok(cls, load):
536        """Check if a devserver has enough free disk.
537
538        @param load: A dict of the load of the devserver.
539
540        @return: True if the devserver has enough free disk or disk check is
541                 skipped in global config.
542
543        """
544        if SKIP_DEVSERVER_HEALTH_CHECK:
545            logging.debug('devserver health check is skipped.')
546        elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB:
547            return False
548
549        return True
550
551
552    @classmethod
553    def is_apache_client_count_ok(cls, load):
554        """Check if a devserver has enough Apache connections available.
555
556        Apache server by default has maximum of 150 concurrent connections. If
557        a devserver has too many live connections, it likely indicates the
558        server is busy handling many long running download requests, e.g.,
559        downloading stateful partitions. It is better not to add more requests
560        to it.
561
562        @param load: A dict of the load of the devserver.
563
564        @return: True if the devserver has enough Apache connections available,
565                 or disk check is skipped in global config.
566
567        """
568        if SKIP_DEVSERVER_HEALTH_CHECK:
569            logging.debug('devserver health check is skipped.')
570        elif cls.APACHE_CLIENT_COUNT not in load:
571            logging.debug('Apache client count is not collected from devserver.')
572        elif (load[cls.APACHE_CLIENT_COUNT] >
573              cls._MAX_APACHE_CLIENT_COUNT):
574            return False
575
576        return True
577
578
579    @classmethod
580    def devserver_healthy(cls, devserver,
581                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
582        """Returns True if the |devserver| is healthy to stage build.
583
584        @param devserver: url of the devserver.
585        @param timeout_min: How long to wait in minutes before deciding the
586                            the devserver is not up (float).
587
588        @return: True if devserver is healthy. Return False otherwise.
589
590        """
591        c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy')
592        reason = ''
593        healthy = False
594        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
595        try:
596            if not load:
597                # Failed to get the load of devserver.
598                reason = '(1) Failed to get load.'
599                return False
600
601            apache_ok = cls.is_apache_client_count_ok(load)
602            if not apache_ok:
603                reason = '(2) Apache client count too high.'
604                logging.error('Devserver check_health failed. Live Apache client '
605                              'count is too high: %d.',
606                              load[cls.APACHE_CLIENT_COUNT])
607                return False
608
609            disk_ok = cls.is_free_disk_ok(load)
610            if not disk_ok:
611                reason = '(3) Disk space too low.'
612                logging.error('Devserver check_health failed. Free disk space is '
613                              'low. Only %dGB is available.',
614                              load[cls.FREE_DISK])
615            healthy = bool(disk_ok)
616            return disk_ok
617        finally:
618            c.increment(fields={'dev_server': cls(devserver).resolved_hostname,
619                                'healthy': healthy,
620                                'reason': reason})
621            # Monitor how many AU processes the devserver is currently running.
622            if load is not None and load.get(DevServer.AU_PROCESS):
623                c_au = metrics.Gauge(
624                        'chromeos/autotest/devserver/devserver_au_count')
625                c_au.set(
626                    load.get(DevServer.AU_PROCESS),
627                    fields={'dev_server': cls(devserver).resolved_hostname})
628
629
630    @staticmethod
631    def _build_call(host, method, **kwargs):
632        """Build a URL to |host| that calls |method|, passing |kwargs|.
633
634        Builds a URL that calls |method| on the dev server defined by |host|,
635        passing a set of key/value pairs built from the dict |kwargs|.
636
637        @param host: a string that is the host basename e.g. http://server:90.
638        @param method: the dev server method to call.
639        @param kwargs: a dict mapping arg names to arg values.
640        @return the URL string.
641        """
642        # If the archive_url is a local path, the args expected by the devserver
643        # are a little different.
644        archive_url_args = _gs_or_local_archive_url_args(
645                kwargs.pop('archive_url', None))
646        kwargs.update(archive_url_args)
647
648        argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems()))
649        return "%(host)s/%(method)s?%(argstr)s" % dict(
650                host=host, method=method, argstr=argstr)
651
652
653    def build_call(self, method, **kwargs):
654        """Builds a devserver RPC string that is used by 'run_call()'.
655
656        @param method: remote devserver method to call.
657        """
658        return self._build_call(self._devserver, method, **kwargs)
659
660
661    @classmethod
662    def build_all_calls(cls, method, **kwargs):
663        """Builds a list of URLs that makes RPC calls on all devservers.
664
665        Build a URL that calls |method| on the dev server, passing a set
666        of key/value pairs built from the dict |kwargs|.
667
668        @param method: the dev server method to call.
669        @param kwargs: a dict mapping arg names to arg values
670
671        @return the URL string
672        """
673        calls = []
674        # Note we use cls.servers as servers is class specific.
675        for server in cls.servers():
676            if cls.devserver_healthy(server):
677                calls.append(cls._build_call(server, method, **kwargs))
678
679        return calls
680
681
682    @classmethod
683    def run_call(cls, call, readline=False, timeout=None):
684        """Invoke a given devserver call using urllib.open.
685
686        Open the URL with HTTP, and return the text of the response. Exceptions
687        may be raised as for urllib2.urlopen().
688
689        @param call: a url string that calls a method to a devserver.
690        @param readline: whether read http response line by line.
691        @param timeout: The timeout seconds for this urlopen call.
692
693        @return the results of this call.
694        """
695        if timeout is not None:
696            return utils.urlopen_socket_timeout(
697                    call, timeout=timeout).read()
698        elif readline:
699            response = urllib2.urlopen(call)
700            return [line.rstrip() for line in response]
701        else:
702            return urllib2.urlopen(call).read()
703
704
705    @staticmethod
706    def servers():
707        """Returns a list of servers that can serve as this type of server."""
708        raise NotImplementedError()
709
710
711    @classmethod
712    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
713                                      unrestricted_only=False):
714        """Get the devservers in the same subnet of the given ip.
715
716        @param ip: The IP address of a dut to look for devserver.
717        @param mask_bits: Number of mask bits. Default is 19.
718        @param unrestricted_only: Set to True to select from devserver in
719                unrestricted subnet only. Default is False.
720
721        @return: A list of devservers in the same subnet of the given ip.
722
723        """
724        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
725        # we need a dict to return the full devserver path once the IPs are
726        # filtered in get_servers_in_same_subnet.
727        server_names = {}
728        all_devservers = []
729        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
730                      else cls.servers())
731        for server in devservers:
732            server_name = get_hostname(server)
733            server_names[server_name] = server
734            all_devservers.append(server_name)
735        if not all_devservers:
736            devserver_type = 'unrestricted only' if unrestricted_only else 'all'
737            raise DevServerFailToLocateException(
738                'Fail to locate a devserver for dut %s in %s devservers'
739                % (ip, devserver_type))
740
741        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
742                                                      all_devservers)
743        return [server_names[s] for s in devservers]
744
745
746    @classmethod
747    def get_unrestricted_devservers(
748                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
749        """Get the devservers not in any restricted subnet specified in
750        restricted_subnets.
751
752        @param restricted_subnets: A list of restriected subnets.
753
754        @return: A list of devservers not in any restricted subnet.
755
756        """
757        if not restricted_subnets:
758            return cls.servers()
759
760        devservers = []
761        for server in cls.servers():
762            server_name = get_hostname(server)
763            if not utils.get_restricted_subnet(server_name, restricted_subnets):
764                devservers.append(server)
765        return devservers
766
767
768    @classmethod
769    def get_healthy_devserver(cls, build, devservers, ban_list=None):
770        """"Get a healthy devserver instance from the list of devservers.
771
772        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
773        @param devservers: The devserver list to be chosen out a healthy one.
774        @param ban_list: The blacklist of devservers we don't want to choose.
775                Default is None.
776
777        @return: A DevServer object of a healthy devserver. Return None if no
778                healthy devserver is found.
779
780        """
781        logging.debug('Pick one healthy devserver from %r', devservers)
782        while devservers:
783            hash_index = hash(build) % len(devservers)
784            devserver = devservers.pop(hash_index)
785            logging.debug('Check health for %s', devserver)
786            if ban_list and devserver in ban_list:
787                continue
788
789            if cls.devserver_healthy(devserver):
790                logging.debug('Pick %s', devserver)
791                return cls(devserver)
792
793
794    @classmethod
795    def get_available_devservers(cls, hostname=None,
796                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
797                                 restricted_subnets=utils.RESTRICTED_SUBNETS):
798        """Get devservers in the same subnet of the given hostname.
799
800        @param hostname: Hostname of a DUT to choose devserver for.
801
802        @return: A tuple of (devservers, can_retry), devservers is a list of
803                 devservers that's available for the given hostname. can_retry
804                 is a flag that indicate if caller can retry the selection of
805                 devserver if no devserver in the returned devservers can be
806                 used. For example, if hostname is in a restricted subnet,
807                 can_retry will be False.
808        """
809        logging.info('Getting devservers for host: %s',  hostname)
810        host_ip = None
811        if hostname:
812            host_ip = bin_utils.get_ip_address(hostname)
813            if not host_ip:
814                logging.error('Failed to get IP address of %s. Will pick a '
815                              'devserver without subnet constraint.', hostname)
816
817        if not host_ip:
818            return cls.get_unrestricted_devservers(restricted_subnets), False
819
820        # Go through all restricted subnet settings and check if the DUT is
821        # inside a restricted subnet. If so, only return the devservers in the
822        # restricted subnet and doesn't allow retry.
823        if host_ip and restricted_subnets:
824            subnet_ip, mask_bits = _get_subnet_for_host_ip(
825                    host_ip, restricted_subnets=restricted_subnets)
826            if subnet_ip:
827                logging.debug('The host %s (%s) is in a restricted subnet. '
828                              'Try to locate a devserver inside subnet '
829                              '%s:%d.', hostname, host_ip, subnet_ip,
830                              mask_bits)
831                devservers = cls.get_devservers_in_same_subnet(
832                        subnet_ip, mask_bits)
833                return devservers, False
834
835        # If prefer_local_devserver is set to True and the host is not in
836        # restricted subnet, pick a devserver in the same subnet if possible.
837        # Set can_retry to True so it can pick a different devserver if all
838        # devservers in the same subnet are down.
839        if prefer_local_devserver:
840            return (cls.get_devservers_in_same_subnet(
841                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
842
843        return cls.get_unrestricted_devservers(restricted_subnets), False
844
845
846    @classmethod
847    def resolve(cls, build, hostname=None, ban_list=None):
848        """"Resolves a build to a devserver instance.
849
850        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
851        @param hostname: The hostname of dut that requests a devserver. It's
852                         used to make sure a devserver in the same subnet is
853                         preferred.
854        @param ban_list: The blacklist of devservers shouldn't be chosen.
855
856        @raise DevServerException: If no devserver is available.
857        """
858        tried_devservers = set()
859        devservers, can_retry = cls.get_available_devservers(hostname)
860        if devservers:
861            tried_devservers |= set(devservers)
862
863        devserver = cls.get_healthy_devserver(build, devservers,
864                                              ban_list=ban_list)
865
866        if not devserver and can_retry:
867            # Find available devservers without dut location constrain.
868            devservers, _ = cls.get_available_devservers()
869            devserver = cls.get_healthy_devserver(build, devservers,
870                                                  ban_list=ban_list)
871            if devservers:
872                tried_devservers |= set(devservers)
873        if devserver:
874            return devserver
875        else:
876            subnet = 'unrestricted subnet'
877            if hostname is not None:
878                host_ip = bin_utils.get_ip_address(hostname)
879                if host_ip:
880                    subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip)
881                    subnet = '%s/%s' % (str(subnet_ip), str(mask_bits))
882
883            error_msg = ('All devservers in subnet: %s are currently down: '
884                         '%s. (dut hostname: %s)' %
885                         (subnet, tried_devservers, hostname))
886            logging.error(error_msg)
887            c = metrics.Counter(
888                    'chromeos/autotest/devserver/subnet_without_devservers')
889            c.increment(fields={'subnet': subnet, 'hostname': str(hostname)})
890            raise DevServerException(error_msg)
891
892
893    @classmethod
894    def random(cls):
895        """Return a random devserver that's available.
896
897        Devserver election in `resolve` method is based on a hash of the
898        build that a caller wants to stage. The purpose is that different
899        callers requesting for the same build can get the same devserver,
900        while the lab is able to distribute different builds across all
901        devservers. That helps to reduce the duplication of builds across
902        all devservers.
903        This function returns a random devserver, by passing a random
904        pseudo build name to `resolve `method.
905        """
906        return cls.resolve(build=str(time.time()))
907
908
909class CrashServer(DevServer):
910    """Class of DevServer that symbolicates crash dumps."""
911
912    @staticmethod
913    def servers():
914        return _get_crash_server_list()
915
916
917    @remote_devserver_call()
918    def symbolicate_dump(self, minidump_path, build):
919        """Ask the devserver to symbolicate the dump at minidump_path.
920
921        Stage the debug symbols for |build| and, if that works, ask the
922        devserver to symbolicate the dump at |minidump_path|.
923
924        @param minidump_path: the on-disk path of the minidump.
925        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
926                      whose debug symbols are needed for symbolication.
927        @return The contents of the stack trace
928        @raise DevServerException upon any return code that's not HTTP OK.
929        """
930        try:
931            import requests
932        except ImportError:
933            logging.warning("Can't 'import requests' to connect to dev server.")
934            return ''
935        f = {'dev_server': self.resolved_hostname}
936        c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump')
937        c.increment(fields=f)
938        # Symbolicate minidump.
939        m = 'chromeos/autotest/crashserver/symbolicate_dump_duration'
940        with metrics.SecondsTimer(m, fields=f):
941            call = self.build_call('symbolicate_dump',
942                                   archive_url=_get_image_storage_server() + build)
943            request = requests.post(
944                    call, files={'minidump': open(minidump_path, 'rb')})
945            if request.status_code == requests.codes.OK:
946                return request.text
947
948        error_fd = cStringIO.StringIO(request.text)
949        raise urllib2.HTTPError(
950                call, request.status_code, request.text, request.headers,
951                error_fd)
952
953
954    @classmethod
955    def get_available_devservers(cls, hostname):
956        """Get all available crash servers.
957
958        Crash server election doesn't need to count the location of hostname.
959
960        @param hostname: Hostname of a DUT to choose devserver for.
961
962        @return: A tuple of (all crash servers, False). can_retry is set to
963                 False, as all crash servers are returned. There is no point to
964                 retry.
965        """
966        return cls.servers(), False
967
968
969class ImageServerBase(DevServer):
970    """Base class for devservers used to stage builds.
971
972    CrOS and Android builds are staged in different ways as they have different
973    sets of artifacts. This base class abstracts the shared functions between
974    the two types of ImageServer.
975    """
976
977    @classmethod
978    def servers(cls):
979        """Returns a list of servers that can serve as a desired type of
980        devserver.
981        """
982        return _get_dev_server_list()
983
984
985    def _get_image_url(self, image):
986        """Returns the url of the directory for this image on the devserver.
987
988        @param image: the image that was fetched.
989        """
990        image = self.translate(image)
991        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
992                                              type=str)
993        return (url_pattern % (self.url(), image)).replace('update', 'static')
994
995
996    @staticmethod
997    def create_metadata(server_name, image, artifacts=None, files=None):
998        """Create a metadata dictionary given the staged items.
999
1000        The metadata can be send to metadata db along with stats.
1001
1002        @param server_name: name of the devserver, e.g 172.22.33.44.
1003        @param image: The name of the image.
1004        @param artifacts: A list of artifacts.
1005        @param files: A list of files.
1006
1007        @return A metadata dictionary.
1008
1009        """
1010        metadata = {'devserver': server_name,
1011                    'image': image,
1012                    '_type': 'devserver'}
1013        if artifacts:
1014            metadata['artifacts'] = ' '.join(artifacts)
1015        if files:
1016            metadata['files'] = ' '.join(files)
1017        return metadata
1018
1019
1020    @classmethod
1021    def run_ssh_call(cls, call, readline=False, timeout=None):
1022        """Construct an ssh-based rpc call, and execute it.
1023
1024        @param call: a url string that calls a method to a devserver.
1025        @param readline: whether read http response line by line.
1026        @param timeout: The timeout seconds for ssh call.
1027
1028        @return the results of this call.
1029        """
1030        hostname = get_hostname(call)
1031        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
1032        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
1033        try:
1034            result = utils.run(ssh_call, timeout=timeout_seconds)
1035        except error.CmdError as e:
1036            logging.debug('Error occurred with exit_code %d when executing the '
1037                          'ssh call: %s.', e.result_obj.exit_status,
1038                          e.result_obj.stderr)
1039            c = metrics.Counter('chromeos/autotest/devserver/ssh_failure')
1040            c.increment(fields={'dev_server': hostname})
1041            raise
1042        response = result.stdout
1043
1044        # If the curl command's returned HTTP response contains certain
1045        # exception string, raise the DevServerException of the response.
1046        if 'DownloaderException' in response:
1047            raise DevServerException(_strip_http_message(response))
1048
1049        if readline:
1050            # Remove line terminators and trailing whitespace
1051            response = response.splitlines()
1052            return [line.rstrip() for line in response]
1053
1054        return response
1055
1056
1057    @classmethod
1058    def run_call(cls, call, readline=False, timeout=None):
1059        """Invoke a given devserver call using urllib.open or ssh.
1060
1061        Open the URL with HTTP or SSH-based HTTP, and return the text of the
1062        response. Exceptions may be raised as for urllib2.urlopen() or
1063        utils.run().
1064
1065        @param call: a url string that calls a method to a devserver.
1066        @param readline: whether read http response line by line.
1067        @param timeout: The timeout seconds for urlopen call or ssh call.
1068
1069        @return the results of this call.
1070        """
1071        server_name = get_hostname(call)
1072        is_in_restricted_subnet = utils.get_restricted_subnet(
1073                server_name, utils.RESTRICTED_SUBNETS)
1074        _EMPTY_SENTINEL_VALUE = object()
1075        def kickoff_call():
1076            """Invoke a given devserver call using urllib.open or ssh.
1077
1078            @param call: a url string that calls a method to a devserver.
1079            @param is_in_restricted_subnet: whether the devserver is in subnet.
1080            @param readline: whether read http response line by line.
1081            @param timeout: The timeout seconds for urlopen call or ssh call.
1082            """
1083            if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or
1084                not is_in_restricted_subnet):
1085                response = super(ImageServerBase, cls).run_call(
1086                        call, readline=readline, timeout=timeout)
1087            else:
1088                response = cls.run_ssh_call(
1089                        call, readline=readline, timeout=timeout)
1090            # Retry if devserver service is temporarily down, e.g. in a
1091            # devserver push.
1092            if ERR_MSG_FOR_DOWN_DEVSERVER in response:
1093                return False
1094
1095            # Don't return response directly since it may be empty string,
1096            # which causes poll_for_condition to retry.
1097            return _EMPTY_SENTINEL_VALUE if not response else response
1098
1099        try:
1100            response = bin_utils.poll_for_condition(
1101                    kickoff_call,
1102                    exception=bin_utils.TimeoutError(),
1103                    timeout=60,
1104                    sleep_interval=5)
1105            return '' if response is _EMPTY_SENTINEL_VALUE else response
1106        except bin_utils.TimeoutError:
1107            return ERR_MSG_FOR_DOWN_DEVSERVER
1108
1109
1110    @classmethod
1111    def download_file(cls, remote_file, local_file, timeout=None):
1112        """Download file from devserver.
1113
1114        The format of remote_file should be:
1115            http://devserver_ip:8082/static/board/...
1116
1117        @param remote_file: The URL of the file on devserver that need to be
1118            downloaded.
1119        @param local_file: The path of the file saved to local.
1120        @param timeout: The timeout seconds for this call.
1121        """
1122        response = cls.run_call(remote_file, timeout=timeout)
1123        with open(local_file, 'w') as out_log:
1124            out_log.write(response)
1125
1126
1127    def _poll_is_staged(self, **kwargs):
1128        """Polling devserver.is_staged until all artifacts are staged.
1129
1130        @param kwargs: keyword arguments to make is_staged devserver call.
1131
1132        @return: True if all artifacts are staged in devserver.
1133        """
1134        call = self.build_call('is_staged', **kwargs)
1135
1136        def all_staged():
1137            """Call devserver.is_staged rpc to check if all files are staged.
1138
1139            @return: True if all artifacts are staged in devserver. False
1140                     otherwise.
1141            @rasies DevServerException, the exception is a wrapper of all
1142                    exceptions that were raised when devserver tried to download
1143                    the artifacts. devserver raises an HTTPError or a CmdError
1144                    when an exception was raised in the code. Such exception
1145                    should be re-raised here to stop the caller from waiting.
1146                    If the call to devserver failed for connection issue, a
1147                    URLError exception is raised, and caller should retry the
1148                    call to avoid such network flakiness.
1149
1150            """
1151            try:
1152                result = self.run_call(call)
1153                logging.debug('whether artifact is staged: %r', result)
1154                return result == 'True'
1155            except urllib2.HTTPError as e:
1156                error_markup = e.read()
1157                raise DevServerException(_strip_http_message(error_markup))
1158            except urllib2.URLError as e:
1159                # Could be connection issue, retry it.
1160                # For example: <urlopen error [Errno 111] Connection refused>
1161                logging.error('URLError happens in is_stage: %r', e)
1162                return False
1163            except error.CmdError as e:
1164                # Retry if SSH failed to connect to the devserver.
1165                logging.warning('CmdError happens in is_stage: %r, will retry', e)
1166                return False
1167
1168        bin_utils.poll_for_condition(
1169                all_staged,
1170                exception=bin_utils.TimeoutError(),
1171                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
1172                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
1173
1174        return True
1175
1176
1177    def _call_and_wait(self, call_name, error_message,
1178                       expected_response=SUCCESS, **kwargs):
1179        """Helper method to make a urlopen call, and wait for artifacts staged.
1180
1181        @param call_name: name of devserver rpc call.
1182        @param error_message: Error message to be thrown if response does not
1183                              match expected_response.
1184        @param expected_response: Expected response from rpc, default to
1185                                  |Success|. If it's set to None, do not compare
1186                                  the actual response. Any response is consider
1187                                  to be good.
1188        @param kwargs: keyword arguments to make is_staged devserver call.
1189
1190        @return: The response from rpc.
1191        @raise DevServerException upon any return code that's expected_response.
1192
1193        """
1194        call = self.build_call(call_name, async=True, **kwargs)
1195        try:
1196            response = self.run_call(call)
1197            logging.debug('response for RPC: %r', response)
1198            if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response:
1199                logging.debug('Proxy error happens in RPC call, '
1200                              'will retry in 30 seconds')
1201                time.sleep(30)
1202                raise DevServerOverloadException()
1203        except httplib.BadStatusLine as e:
1204            logging.error(e)
1205            raise DevServerException('Received Bad Status line, Devserver %s '
1206                                     'might have gone down while handling '
1207                                     'the call: %s' % (self.url(), call))
1208
1209        if expected_response and not response == expected_response:
1210                raise DevServerException(error_message)
1211
1212        # `os_type` is needed in build a devserver call, but not needed for
1213        # wait_for_artifacts_staged, since that method is implemented by
1214        # each ImageServerBase child class.
1215        if 'os_type' in kwargs:
1216            del kwargs['os_type']
1217        self.wait_for_artifacts_staged(**kwargs)
1218        return response
1219
1220
1221    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
1222        """Tell the devserver to download and stage |artifacts| from |image|
1223        specified by kwargs.
1224
1225        This is the main call point for staging any specific artifacts for a
1226        given build. To see the list of artifacts one can stage see:
1227
1228        ~src/platfrom/dev/artifact_info.py.
1229
1230        This is maintained along with the actual devserver code.
1231
1232        @param artifacts: A list of artifacts.
1233        @param files: A list of files to stage.
1234        @param archive_url: Optional parameter that has the archive_url to stage
1235                this artifact from. Default is specified in autotest config +
1236                image.
1237        @param kwargs: keyword arguments that specify the build information, to
1238                make stage devserver call.
1239
1240        @raise DevServerException upon any return code that's not HTTP OK.
1241        """
1242        if not archive_url:
1243            archive_url = _get_storage_server_for_artifacts(artifacts) + build
1244
1245        artifacts_arg = ','.join(artifacts) if artifacts else ''
1246        files_arg = ','.join(files) if files else ''
1247        error_message = ("staging %s for %s failed;"
1248                         "HTTP OK not accompanied by 'Success'." %
1249                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
1250                          build))
1251
1252        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
1253                        (build, artifacts, files, archive_url))
1254        logging.info('Staging artifacts on devserver %s: %s',
1255                     self.url(), staging_info)
1256        success = False
1257        try:
1258            arguments = {'archive_url': archive_url,
1259                         'artifacts': artifacts_arg,
1260                         'files': files_arg}
1261            if kwargs:
1262                arguments.update(kwargs)
1263            # TODO(akeshet): canonicalize artifacts_arg before using it as a
1264            # metric field (as it stands it is a not-very-well-controlled
1265            # string).
1266            f = {'artifacts': artifacts_arg,
1267                 'dev_server': self.resolved_hostname}
1268            with metrics.SecondsTimer(
1269                    'chromeos/autotest/devserver/stage_artifact_duration',
1270                    fields=f):
1271                self.call_and_wait(call_name='stage', error_message=error_message,
1272                                   **arguments)
1273            logging.info('Finished staging artifacts: %s', staging_info)
1274            success = True
1275        except (bin_utils.TimeoutError, error.TimeoutException):
1276            logging.error('stage_artifacts timed out: %s', staging_info)
1277            raise DevServerException(
1278                    'stage_artifacts timed out: %s' % staging_info)
1279        finally:
1280            f = {'success': success,
1281                 'artifacts': artifacts_arg,
1282                 'dev_server': self.resolved_hostname}
1283            metrics.Counter('chromeos/autotest/devserver/stage_artifact'
1284                            ).increment(fields=f)
1285
1286
1287    def call_and_wait(self, *args, **kwargs):
1288        """Helper method to make a urlopen call, and wait for artifacts staged.
1289
1290        This method needs to be overridden in the subclass to implement the
1291        logic to call _call_and_wait.
1292        """
1293        raise NotImplementedError
1294
1295
1296    def _trigger_download(self, build, artifacts, files, synchronous=True,
1297                          **kwargs_build_info):
1298        """Tell the devserver to download and stage image specified in
1299        kwargs_build_info.
1300
1301        Tells the devserver to fetch |image| from the image storage server
1302        named by _get_image_storage_server().
1303
1304        If |synchronous| is True, waits for the entire download to finish
1305        staging before returning. Otherwise only the artifacts necessary
1306        to start installing images onto DUT's will be staged before returning.
1307        A caller can then call finish_download to guarantee the rest of the
1308        artifacts have finished staging.
1309
1310        @param synchronous: if True, waits until all components of the image are
1311               staged before returning.
1312        @param kwargs_build_info: Dictionary of build information.
1313                For CrOS, it is None as build is the CrOS image name.
1314                For Android, it is {'target': target,
1315                                    'build_id': build_id,
1316                                    'branch': branch}
1317
1318        @raise DevServerException upon any return code that's not HTTP OK.
1319
1320        """
1321        if kwargs_build_info:
1322            archive_url = None
1323        else:
1324            archive_url = _get_image_storage_server() + build
1325        error_message = ("trigger_download for %s failed;"
1326                         "HTTP OK not accompanied by 'Success'." % build)
1327        kwargs = {'archive_url': archive_url,
1328                  'artifacts': artifacts,
1329                  'files': files,
1330                  'error_message': error_message}
1331        if kwargs_build_info:
1332            kwargs.update(kwargs_build_info)
1333
1334        logging.info('trigger_download starts for %s', build)
1335        try:
1336            response = self.call_and_wait(call_name='stage', **kwargs)
1337            logging.info('trigger_download finishes for %s', build)
1338        except (bin_utils.TimeoutError, error.TimeoutException):
1339            logging.error('trigger_download timed out for %s.', build)
1340            raise DevServerException(
1341                    'trigger_download timed out for %s.' % build)
1342        was_successful = response == SUCCESS
1343        if was_successful and synchronous:
1344            self._finish_download(build, artifacts, files, **kwargs_build_info)
1345
1346
1347    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1348        """Tell the devserver to finish staging image specified in
1349        kwargs_build_info.
1350
1351        If trigger_download is called with synchronous=False, it will return
1352        before all artifacts have been staged. This method contacts the
1353        devserver and blocks until all staging is completed and should be
1354        called after a call to trigger_download.
1355
1356        @param kwargs_build_info: Dictionary of build information.
1357                For CrOS, it is None as build is the CrOS image name.
1358                For Android, it is {'target': target,
1359                                    'build_id': build_id,
1360                                    'branch': branch}
1361
1362        @raise DevServerException upon any return code that's not HTTP OK.
1363        """
1364        archive_url = _get_image_storage_server() + build
1365        error_message = ("finish_download for %s failed;"
1366                         "HTTP OK not accompanied by 'Success'." % build)
1367        kwargs = {'archive_url': archive_url,
1368                  'artifacts': artifacts,
1369                  'files': files,
1370                  'error_message': error_message}
1371        if kwargs_build_info:
1372            kwargs.update(kwargs_build_info)
1373        try:
1374            self.call_and_wait(call_name='stage', **kwargs)
1375        except (bin_utils.TimeoutError, error.TimeoutException):
1376            logging.error('finish_download timed out for %s', build)
1377            raise DevServerException(
1378                    'finish_download timed out for %s.' % build)
1379
1380
1381    @remote_devserver_call()
1382    def locate_file(self, file_name, artifacts, build, build_info):
1383        """Locate a file with the given file_name on devserver.
1384
1385        This method calls devserver RPC `locate_file` to look up a file with
1386        the given file name inside specified build artifacts.
1387
1388        @param file_name: Name of the file to look for a file.
1389        @param artifacts: A list of artifact names to search for the file.
1390        @param build: Name of the build. For Android, it's None as build_info
1391                should be used.
1392        @param build_info: Dictionary of build information.
1393                For CrOS, it is None as build is the CrOS image name.
1394                For Android, it is {'target': target,
1395                                    'build_id': build_id,
1396                                    'branch': branch}
1397
1398        @return: A devserver url to the file.
1399        @raise DevServerException upon any return code that's not HTTP OK.
1400        """
1401        if not build and not build_info:
1402            raise DevServerException('You must specify build information to '
1403                                     'look for file %s in artifacts %s.' %
1404                                     (file_name, artifacts))
1405        kwargs = {'file_name': file_name,
1406                  'artifacts': artifacts}
1407        if build_info:
1408            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1409            kwargs.update(build_info)
1410            # Devserver treats Android and Brillo build in the same way as they
1411            # are both retrieved from Launch Control and have similar build
1412            # artifacts. Therefore, os_type for devserver calls is `android` for
1413            # both Android and Brillo builds.
1414            kwargs['os_type'] = 'android'
1415        else:
1416            build_path = build
1417            kwargs['build'] = build
1418        call = self.build_call('locate_file', async=False, **kwargs)
1419        try:
1420            file_path = self.run_call(call)
1421            return os.path.join(self.url(), 'static', build_path, file_path)
1422        except httplib.BadStatusLine as e:
1423            logging.error(e)
1424            raise DevServerException('Received Bad Status line, Devserver %s '
1425                                     'might have gone down while handling '
1426                                     'the call: %s' % (self.url(), call))
1427
1428
1429    @remote_devserver_call()
1430    def list_control_files(self, build, suite_name=''):
1431        """Ask the devserver to list all control files for |build|.
1432
1433        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1434                      whose control files the caller wants listed.
1435        @param suite_name: The name of the suite for which we require control
1436                           files.
1437        @return None on failure, or a list of control file paths
1438                (e.g. server/site_tests/autoupdate/control)
1439        @raise DevServerException upon any return code that's not HTTP OK.
1440        """
1441        build = self.translate(build)
1442        call = self.build_call('controlfiles', build=build,
1443                               suite_name=suite_name)
1444        return self.run_call(call, readline=True)
1445
1446
1447    @remote_devserver_call()
1448    def get_control_file(self, build, control_path):
1449        """Ask the devserver for the contents of a control file.
1450
1451        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1452                      whose control file the caller wants to fetch.
1453        @param control_path: The file to fetch
1454                             (e.g. server/site_tests/autoupdate/control)
1455        @return The contents of the desired file.
1456        @raise DevServerException upon any return code that's not HTTP OK.
1457        """
1458        build = self.translate(build)
1459        call = self.build_call('controlfiles', build=build,
1460                               control_path=control_path)
1461        return self.run_call(call)
1462
1463
1464    @remote_devserver_call()
1465    def list_suite_controls(self, build, suite_name=''):
1466        """Ask the devserver to list contents of all control files for |build|.
1467
1468        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1469                      whose control files' contents the caller wants returned.
1470        @param suite_name: The name of the suite for which we require control
1471                           files.
1472        @return None on failure, or a dict of contents of all control files
1473            (e.g. {'path1': "#Copyright controls ***", ...,
1474                pathX': "#Copyright controls ***"}
1475        @raise DevServerException upon any return code that's not HTTP OK.
1476        """
1477        build = self.translate(build)
1478        call = self.build_call('list_suite_controls', build=build,
1479                               suite_name=suite_name)
1480        return json.load(cStringIO.StringIO(self.run_call(call)))
1481
1482
1483class ImageServer(ImageServerBase):
1484    """Class for DevServer that handles RPCs related to CrOS images.
1485
1486    The calls to devserver to stage artifacts, including stage and download, are
1487    made in async mode. That is, when caller makes an RPC |stage| to request
1488    devserver to stage certain artifacts, devserver handles the call and starts
1489    staging artifacts in a new thread, and return |Success| without waiting for
1490    staging being completed. When caller receives message |Success|, it polls
1491    devserver's is_staged call until all artifacts are staged.
1492    Such mechanism is designed to prevent cherrypy threads in devserver being
1493    running out, as staging artifacts might take long time, and cherrypy starts
1494    with a fixed number of threads that handle devserver rpc.
1495    """
1496
1497    class ArtifactUrls(object):
1498        """A container for URLs of staged artifacts.
1499
1500        Attributes:
1501            full_payload: URL for downloading a staged full release update
1502            mton_payload: URL for downloading a staged M-to-N release update
1503            nton_payload: URL for downloading a staged N-to-N release update
1504
1505        """
1506        def __init__(self, full_payload=None, mton_payload=None,
1507                     nton_payload=None):
1508            self.full_payload = full_payload
1509            self.mton_payload = mton_payload
1510            self.nton_payload = nton_payload
1511
1512
1513    def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''):
1514        """Polling devserver.is_staged until all artifacts are staged.
1515
1516        @param archive_url: Google Storage URL for the build.
1517        @param artifacts: Comma separated list of artifacts to download.
1518        @param files: Comma separated list of files to download.
1519        @return: True if all artifacts are staged in devserver.
1520        """
1521        kwargs = {'archive_url': archive_url,
1522                  'artifacts': artifacts,
1523                  'files': files}
1524        return self._poll_is_staged(**kwargs)
1525
1526
1527    @remote_devserver_call()
1528    def call_and_wait(self, call_name, archive_url, artifacts, files,
1529                      error_message, expected_response=SUCCESS):
1530        """Helper method to make a urlopen call, and wait for artifacts staged.
1531
1532        @param call_name: name of devserver rpc call.
1533        @param archive_url: Google Storage URL for the build..
1534        @param artifacts: Comma separated list of artifacts to download.
1535        @param files: Comma separated list of files to download.
1536        @param expected_response: Expected response from rpc, default to
1537                                  |Success|. If it's set to None, do not compare
1538                                  the actual response. Any response is consider
1539                                  to be good.
1540        @param error_message: Error message to be thrown if response does not
1541                              match expected_response.
1542
1543        @return: The response from rpc.
1544        @raise DevServerException upon any return code that's expected_response.
1545
1546        """
1547        kwargs = {'archive_url': archive_url,
1548                  'artifacts': artifacts,
1549                  'files': files}
1550        return self._call_and_wait(call_name, error_message,
1551                                   expected_response, **kwargs)
1552
1553
1554    @remote_devserver_call()
1555    def stage_artifacts(self, image=None, artifacts=None, files='',
1556                        archive_url=None):
1557        """Tell the devserver to download and stage |artifacts| from |image|.
1558
1559         This is the main call point for staging any specific artifacts for a
1560        given build. To see the list of artifacts one can stage see:
1561
1562        ~src/platfrom/dev/artifact_info.py.
1563
1564        This is maintained along with the actual devserver code.
1565
1566        @param image: the image to fetch and stage.
1567        @param artifacts: A list of artifacts.
1568        @param files: A list of files to stage.
1569        @param archive_url: Optional parameter that has the archive_url to stage
1570                this artifact from. Default is specified in autotest config +
1571                image.
1572
1573        @raise DevServerException upon any return code that's not HTTP OK.
1574        """
1575        if not artifacts and not files:
1576            raise DevServerException('Must specify something to stage.')
1577        image = self.translate(image)
1578        self._stage_artifacts(image, artifacts, files, archive_url)
1579
1580
1581    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1582    def list_image_dir(self, image):
1583        """List the contents of the image stage directory, on the devserver.
1584
1585        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1586
1587        @raise DevServerException upon any return code that's not HTTP OK.
1588        """
1589        image = self.translate(image)
1590        logging.info('Requesting contents from devserver %s for image %s',
1591                     self.url(), image)
1592        archive_url = _get_storage_server_for_artifacts() + image
1593        call = self.build_call('list_image_dir', archive_url=archive_url)
1594        response = self.run_call(call, readline=True)
1595        for line in response:
1596            logging.info(line)
1597
1598
1599    def trigger_download(self, image, synchronous=True):
1600        """Tell the devserver to download and stage |image|.
1601
1602        Tells the devserver to fetch |image| from the image storage server
1603        named by _get_image_storage_server().
1604
1605        If |synchronous| is True, waits for the entire download to finish
1606        staging before returning. Otherwise only the artifacts necessary
1607        to start installing images onto DUT's will be staged before returning.
1608        A caller can then call finish_download to guarantee the rest of the
1609        artifacts have finished staging.
1610
1611        @param image: the image to fetch and stage.
1612        @param synchronous: if True, waits until all components of the image are
1613               staged before returning.
1614
1615        @raise DevServerException upon any return code that's not HTTP OK.
1616
1617        """
1618        image = self.translate(image)
1619        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1620        self._trigger_download(image, artifacts, files='',
1621                               synchronous=synchronous)
1622
1623
1624    @remote_devserver_call()
1625    def setup_telemetry(self, build):
1626        """Tell the devserver to setup telemetry for this build.
1627
1628        The devserver will stage autotest and then extract the required files
1629        for telemetry.
1630
1631        @param build: the build to setup telemetry for.
1632
1633        @returns path on the devserver that telemetry is installed to.
1634        """
1635        build = self.translate(build)
1636        archive_url = _get_image_storage_server() + build
1637        call = self.build_call('setup_telemetry', archive_url=archive_url)
1638        try:
1639            response = self.run_call(call)
1640        except httplib.BadStatusLine as e:
1641            logging.error(e)
1642            raise DevServerException('Received Bad Status line, Devserver %s '
1643                                     'might have gone down while handling '
1644                                     'the call: %s' % (self.url(), call))
1645        return response
1646
1647
1648    def finish_download(self, image):
1649        """Tell the devserver to finish staging |image|.
1650
1651        If trigger_download is called with synchronous=False, it will return
1652        before all artifacts have been staged. This method contacts the
1653        devserver and blocks until all staging is completed and should be
1654        called after a call to trigger_download.
1655
1656        @param image: the image to fetch and stage.
1657        @raise DevServerException upon any return code that's not HTTP OK.
1658        """
1659        image = self.translate(image)
1660        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1661        self._finish_download(image, artifacts, files='')
1662
1663
1664    def get_update_url(self, image):
1665        """Returns the url that should be passed to the updater.
1666
1667        @param image: the image that was fetched.
1668        """
1669        image = self.translate(image)
1670        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1671                                              type=str)
1672        return (url_pattern % (self.url(), image))
1673
1674
1675    def get_staged_file_url(self, filename, image):
1676        """Returns the url of a staged file for this image on the devserver."""
1677        return '/'.join([self._get_image_url(image), filename])
1678
1679
1680    def get_full_payload_url(self, image):
1681        """Returns a URL to a staged full payload.
1682
1683        @param image: the image that was fetched.
1684
1685        @return A fully qualified URL that can be used for downloading the
1686                payload.
1687
1688        """
1689        return self._get_image_url(image) + '/update.gz'
1690
1691
1692    def get_test_image_url(self, image):
1693        """Returns a URL to a staged test image.
1694
1695        @param image: the image that was fetched.
1696
1697        @return A fully qualified URL that can be used for downloading the
1698                image.
1699
1700        """
1701        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1702
1703
1704    def get_recovery_image_url(self, image):
1705        """Returns a URL to a staged recovery image.
1706
1707        @param image: the image that was fetched.
1708
1709        @return A fully qualified URL that can be used for downloading the
1710                image.
1711
1712        """
1713        return self._get_image_url(image) + '/recovery_image.bin'
1714
1715
1716    @remote_devserver_call()
1717    def get_dependencies_file(self, build):
1718        """Ask the dev server for the contents of the suite dependencies file.
1719
1720        Ask the dev server at |self._dev_server| for the contents of the
1721        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1722        for |build|.
1723
1724        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1725                      whose dependencies the caller is interested in.
1726        @return The contents of the dependencies file, which should eval to
1727                a dict of dicts, as per bin_utils/suite_preprocessor.py.
1728        @raise DevServerException upon any return code that's not HTTP OK.
1729        """
1730        build = self.translate(build)
1731        call = self.build_call('controlfiles',
1732                               build=build, control_path=DEPENDENCIES_FILE)
1733        return self.run_call(call)
1734
1735
1736    @remote_devserver_call()
1737    def get_latest_build_in_gs(self, board):
1738        """Ask the devservers for the latest offical build in Google Storage.
1739
1740        @param board: The board for who we want the latest official build.
1741        @return A string of the returned build rambi-release/R37-5868.0.0
1742        @raise DevServerException upon any return code that's not HTTP OK.
1743        """
1744        call = self.build_call(
1745                'xbuddy_translate/remote/%s/latest-official' % board,
1746                image_dir=_get_image_storage_server())
1747        image_name = self.run_call(call)
1748        return os.path.dirname(image_name)
1749
1750
1751    def translate(self, build_name):
1752        """Translate the build name if it's in LATEST format.
1753
1754        If the build name is in the format [builder]/LATEST, return the latest
1755        build in Google Storage otherwise return the build name as is.
1756
1757        @param build_name: build_name to check.
1758
1759        @return The actual build name to use.
1760        """
1761        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1762        if not match:
1763            return build_name
1764        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1765        logging.debug('Translated relative build %s to %s', build_name,
1766                      translated_build)
1767        return translated_build
1768
1769
1770    @classmethod
1771    @remote_devserver_call()
1772    def get_latest_build(cls, target, milestone=''):
1773        """Ask all the devservers for the latest build for a given target.
1774
1775        @param target: The build target, typically a combination of the board
1776                       and the type of build e.g. x86-mario-release.
1777        @param milestone:  For latest build set to '', for builds only in a
1778                           specific milestone set to a str of format Rxx
1779                           (e.g. R16). Default: ''. Since we are dealing with a
1780                           webserver sending an empty string, '', ensures that
1781                           the variable in the URL is ignored as if it was set
1782                           to None.
1783        @return A string of the returned build e.g. R20-2226.0.0.
1784        @raise DevServerException upon any return code that's not HTTP OK.
1785        """
1786        calls = cls.build_all_calls('latestbuild', target=target,
1787                                    milestone=milestone)
1788        latest_builds = []
1789        for call in calls:
1790            latest_builds.append(cls.run_call(call))
1791
1792        return max(latest_builds, key=version.LooseVersion)
1793
1794
1795    @remote_devserver_call()
1796    def _kill_au_process_for_host(self, **kwargs):
1797        """Kill the triggerred auto_update process if error happens in cros_au.
1798
1799        @param kwargs: Arguments to make kill_au_proc devserver call.
1800        """
1801        call = self.build_call('kill_au_proc', **kwargs)
1802        response = self.run_call(call)
1803        if not response == 'True':
1804            raise DevServerException(
1805                    'Failed to kill the triggerred CrOS auto_update process'
1806                    'on devserver %s, the response is %s' % (
1807                            self.url(), response))
1808
1809
1810    def kill_au_process_for_host(self, host_name, pid):
1811        """Kill the triggerred auto_update process if error happens.
1812
1813        Usually this function is used to clear all potential left au processes
1814        of the given host name.
1815
1816        If pid is specified, the devserver will further check the given pid to
1817        make sure the process is killed. This is used for the case that the au
1818        process has started in background, but then provision fails due to
1819        some unknown issues very fast. In this case, when 'kill_au_proc' is
1820        called, there's no corresponding background track log created for this
1821        ongoing au process, which prevents this RPC call from killing this au
1822        process.
1823
1824        @param host_name: The DUT's hostname.
1825        @param pid: The ongoing au process's pid.
1826
1827        @return: True if successfully kill the auto-update process for host.
1828        """
1829        kwargs = {'host_name': host_name, 'pid': pid}
1830        try:
1831            self._kill_au_process_for_host(**kwargs)
1832        except DevServerException:
1833            return False
1834
1835        return True
1836
1837
1838    @remote_devserver_call()
1839    def _clean_track_log(self, **kwargs):
1840        """Clean track log for the current auto-update process."""
1841        call = self.build_call('handler_cleanup', **kwargs)
1842        self.run_call(call)
1843
1844
1845    def clean_track_log(self, host_name, pid):
1846        """Clean track log for the current auto-update process.
1847
1848        @param host_name: The host name to be updated.
1849        @param pid: The auto-update process id.
1850
1851        @return: True if track log is successfully cleaned, False otherwise.
1852        """
1853        if not pid:
1854            return False
1855
1856        kwargs = {'host_name': host_name, 'pid': pid}
1857        try:
1858            self._clean_track_log(**kwargs)
1859        except DevServerException as e:
1860            logging.debug('Failed to clean track_status_file on '
1861                          'devserver for host %s and process id %s: %s',
1862                          host_name, pid, str(e))
1863            return False
1864
1865        return True
1866
1867
1868    def _get_au_log_filename(self, log_dir, host_name, pid):
1869        """Return the auto-update log's filename."""
1870        return os.path.join(log_dir, CROS_AU_LOG_FILENAME % (
1871                    host_name, pid))
1872
1873    def _read_json_response_from_devserver(self, response):
1874        """Reads the json response from the devserver.
1875
1876        This is extracted to its own function so that it can be easily mocked.
1877        @param response: the response for a devserver.
1878        """
1879        try:
1880            return json.loads(response)
1881        except ValueError as e:
1882            logging.debug('Failed to load json response: %s', response)
1883            raise DevServerException(e)
1884
1885
1886    @remote_devserver_call()
1887    def _collect_au_log(self, log_dir, **kwargs):
1888        """Collect logs from devserver after cros-update process is finished.
1889
1890        Collect the logs that recording the whole cros-update process, and
1891        write it to sysinfo path of a job.
1892
1893        The example log file name that is stored is like:
1894            '1220-repair/sysinfo/CrOS_update_host_name_pid.log'
1895
1896        @param host_name: the DUT's hostname.
1897        @param pid: the auto-update process id on devserver.
1898        @param log_dir: The directory to save the cros-update process log
1899                        retrieved from devserver.
1900        """
1901        call = self.build_call('collect_cros_au_log', **kwargs)
1902        response = self.run_call(call)
1903        if not os.path.exists(log_dir):
1904            os.mkdir(log_dir)
1905        write_file = self._get_au_log_filename(
1906                log_dir, kwargs['host_name'], kwargs['pid'])
1907        logging.debug('Saving auto-update logs into %s', write_file)
1908
1909        au_logs = self._read_json_response_from_devserver(response)
1910
1911        try:
1912            for k, v in au_logs['host_logs'].items():
1913                log_name = '%s_%s_%s' % (k, kwargs['host_name'], kwargs['pid'])
1914                log_path = os.path.join(log_dir, log_name)
1915                with open(log_path, 'w') as out_log:
1916                    out_log.write(v)
1917        except IOError as e:
1918            raise DevServerException('Failed to write auto-update hostlogs: '
1919                                     '%s' % e)
1920
1921        try:
1922            with open(write_file, 'w') as out_log:
1923                out_log.write(au_logs['cros_au_log'])
1924        except:
1925            raise DevServerException('Failed to write auto-update logs into '
1926                                     '%s' % write_file)
1927
1928
1929    def collect_au_log(self, host_name, pid, log_dir):
1930        """Collect logs from devserver after cros-update process is finished.
1931
1932        @param host_name: the DUT's hostname.
1933        @param pid: the auto-update process id on devserver.
1934        @param log_dir: The directory to save the cros-update process log
1935                        retrieved from devserver.
1936
1937        @return: True if auto-update log is successfully collected, False
1938          otherwise.
1939        """
1940        if not pid:
1941            return False
1942
1943        kwargs = {'host_name': host_name, 'pid': pid}
1944        try:
1945            self._collect_au_log(log_dir, **kwargs)
1946        except DevServerException as e:
1947            logging.debug('Failed to collect auto-update log on '
1948                          'devserver for host %s and process id %s: %s',
1949                          host_name, pid, str(e))
1950            return False
1951
1952        return True
1953
1954
1955    @remote_devserver_call()
1956    def _trigger_auto_update(self, **kwargs):
1957        """Trigger auto-update by calling devserver.cros_au.
1958
1959        @param kwargs:  Arguments to make cros_au devserver call.
1960
1961        @return: a tuple indicates whether the RPC call cros_au succeeds and
1962          the auto-update process id running on devserver.
1963        """
1964        host_name = kwargs['host_name']
1965        call = self.build_call('cros_au', async=True, **kwargs)
1966        try:
1967            response = self.run_call(call)
1968            logging.info(
1969                'Received response from devserver for cros_au call: %r',
1970                response)
1971        except httplib.BadStatusLine as e:
1972            logging.error(e)
1973            raise DevServerException('Received Bad Status line, Devserver %s '
1974                                     'might have gone down while handling '
1975                                     'the call: %s' % (self.url(), call))
1976
1977        return response
1978
1979
1980    def _check_for_auto_update_finished(self, pid, wait=True, **kwargs):
1981        """Polling devserver.get_au_status to get current auto-update status.
1982
1983        The current auto-update status is used to identify whether the update
1984        process is finished.
1985
1986        @param pid:    The background process id for auto-update in devserver.
1987        @param kwargs: keyword arguments to make get_au_status devserver call.
1988        @param wait:   Should the check wait for completion.
1989
1990        @return: True if auto-update is finished for a given dut.
1991        """
1992        logging.debug('Check the progress for auto-update process %r', pid)
1993        kwargs['pid'] = pid
1994        call = self.build_call('get_au_status', **kwargs)
1995
1996        def all_finished():
1997            """Call devserver.get_au_status rpc to check if auto-update
1998               is finished.
1999
2000            @return: True if auto-update is finished for a given dut. False
2001                     otherwise.
2002            @rasies  DevServerException, the exception is a wrapper of all
2003                     exceptions that were raised when devserver tried to
2004                     download the artifacts. devserver raises an HTTPError or
2005                     a CmdError when an exception was raised in the code. Such
2006                     exception should be re-raised here to stop the caller from
2007                     waiting. If the call to devserver failed for connection
2008                     issue, a URLError exception is raised, and caller should
2009                     retry the call to avoid such network flakiness.
2010
2011            """
2012            try:
2013                au_status = self.run_call(call)
2014                response = json.loads(au_status)
2015                # This is a temp fix to fit both dict and tuple returning
2016                # values. The dict check will be removed after a corresponding
2017                # devserver CL is deployed.
2018                if isinstance(response, dict):
2019                    if response.get('detailed_error_msg'):
2020                        raise DevServerException(
2021                                response.get('detailed_error_msg'))
2022
2023                    if response.get('finished'):
2024                        logging.debug('CrOS auto-update is finished')
2025                        return True
2026                    else:
2027                        logging.debug('Current CrOS auto-update status: %s',
2028                                      response.get('status'))
2029                        return False
2030
2031                if not response[0]:
2032                    logging.debug('Current CrOS auto-update status: %s',
2033                                  response[1])
2034                    return False
2035                else:
2036                    logging.debug('CrOS auto-update is finished')
2037                    return True
2038            except urllib2.HTTPError as e:
2039                error_markup = e.read()
2040                raise DevServerException(_strip_http_message(error_markup))
2041            except urllib2.URLError as e:
2042                # Could be connection issue, retry it.
2043                # For example: <urlopen error [Errno 111] Connection refused>
2044                logging.warning('URLError (%r): Retrying connection to '
2045                                'devserver to check auto-update status.', e)
2046                return False
2047            except error.CmdError:
2048                # Retry if SSH failed to connect to the devserver.
2049                logging.warning('CmdError: Retrying SSH connection to check '
2050                                'auto-update status.')
2051                return False
2052            except socket.error as e:
2053                # Could be some temporary devserver connection issues.
2054                logging.warning('Socket Error (%r): Retrying connection to '
2055                                'devserver to check auto-update status.', e)
2056                return False
2057            except ValueError as e:
2058                raise DevServerException(
2059                        '%s (Got AU status: %r)' % (str(e), au_status))
2060
2061        if wait:
2062            bin_utils.poll_for_condition(
2063                    all_finished,
2064                    exception=bin_utils.TimeoutError(),
2065                    timeout=DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN * 60,
2066                    sleep_interval=CROS_AU_POLLING_INTERVAL)
2067
2068            return True
2069        else:
2070            return all_finished()
2071
2072
2073    def check_for_auto_update_finished(self, response, wait=True, **kwargs):
2074        """Processing response of 'cros_au' and polling for auto-update status.
2075
2076        Will wait for the whole auto-update process is finished.
2077
2078        @param response: The response from RPC 'cros_au'
2079        @param kwargs: keyword arguments to make get_au_status devserver call.
2080
2081        @return: a tuple includes two elements.
2082          finished: True if the operation has completed.
2083          raised_error: None if everything works well or the raised error.
2084          pid: the auto-update process id on devserver.
2085        """
2086
2087        pid = 0
2088        raised_error = None
2089        finished = False
2090        try:
2091            response = json.loads(response)
2092            if response[0]:
2093                pid = response[1]
2094                # If provision is kicked off asynchronously, pid will be -1.
2095                # If provision is not successfully kicked off , pid continues
2096                # to be 0.
2097                if pid > 0:
2098                    logging.debug('start process %r for auto_update in '
2099                                  'devserver', pid)
2100                    finished = self._check_for_auto_update_finished(
2101                            pid, wait=wait, **kwargs)
2102        except Exception as e:
2103            logging.debug('Failed to trigger auto-update process on devserver')
2104            finished = True
2105            raised_error = e
2106        finally:
2107            return finished, raised_error, pid
2108
2109
2110    def _check_error_message(self, error_patterns_to_check, error_msg):
2111        """Detect whether specific error pattern exist in error message.
2112
2113        @param error_patterns_to_check: the error patterns to check
2114        @param error_msg: the error message which may include any error
2115                          pattern.
2116
2117        @return A boolean variable, True if error_msg contains any error
2118            pattern in error_patterns_to_check, False otherwise.
2119        """
2120        for err in error_patterns_to_check:
2121            if err in error_msg:
2122                return True
2123
2124        return False
2125
2126
2127    def _is_retryable(self, error_msg):
2128        """Detect whether we will retry auto-update based on error_msg.
2129
2130        @param error_msg: The given error message.
2131
2132        @return A boolean variable which indicates whether we will retry
2133            auto_update with another devserver based on the given error_msg.
2134        """
2135        # For now we just hard-code the error message we think it's suspicious.
2136        # When we get more date about what's the json response when devserver
2137        # is overloaded, we can update this part.
2138        retryable_error_patterns = [ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE,
2139                                    'is not pingable']
2140        return self._check_error_message(retryable_error_patterns, error_msg)
2141
2142
2143    def _should_use_original_payload(self, error_msg):
2144        devserver_error_patterns = ['DevserverCannotStartError']
2145        return self._check_error_message(devserver_error_patterns, error_msg)
2146
2147
2148    def _parse_buildname_safely(self, build_name):
2149        """Parse a given buildname safely.
2150
2151        @param build_name: the build name to be parsed.
2152
2153        @return: a tuple (board, build_type, milestone)
2154        """
2155        try:
2156            board, build_type, milestone, _ = server_utils.ParseBuildName(
2157                    build_name)
2158        except server_utils.ParseBuildNameException:
2159            logging.warning('Unable to parse build name %s for metrics. '
2160                            'Continuing anyway.', build_name)
2161            board, build_type, milestone = ('', '', '')
2162
2163        return board, build_type, milestone
2164
2165
2166    def _emit_auto_update_metrics(self, board, build_type, dut_host_name,
2167                                  build_name, attempt,
2168                                  success, failure_reason, duration):
2169        """Send metrics for a single auto_update attempt.
2170
2171        @param board: a field in metrics representing which board this
2172            auto_update tries to update.
2173        @param build_type: a field in metrics representing which build type this
2174            auto_update tries to update.
2175        @param dut_host_name: a field in metrics representing which DUT this
2176            auto_update tries to update.
2177        @param build_name: auto update build being updated to.
2178        @param attempt: a field in metrics, representing which attempt/retry
2179            this auto_update is.
2180        @param success: a field in metrics, representing whether this
2181            auto_update succeeds or not.
2182        @param failure_reason: DevServerExceptionClassifier object to show
2183            auto update failure reason, or None.
2184        @param duration: auto update duration time, in seconds.
2185        """
2186        # The following is high cardinality, but sparse.
2187        # Each DUT is of a single board type, and likely build type.
2188        # The affinity also results in each DUT being attached to the same
2189        # dev_server as well.
2190        fields = {
2191                'board': board,
2192                'build_type': build_type,
2193                'dut_host_name': dut_host_name,
2194                'dev_server': self.resolved_hostname,
2195                'attempt': attempt,
2196                'success': success,
2197        }
2198
2199        # reset_after=True is required for String gauges events to ensure that
2200        # the metrics are not repeatedly emitted until the server restarts.
2201
2202        metrics.String(PROVISION_PATH + '/auto_update_build_by_devserver_dut',
2203                       reset_after=True).set(build_name, fields=fields)
2204
2205        if not success:
2206            metrics.String(
2207                PROVISION_PATH +
2208                '/auto_update_failure_reason_by_devserver_dut',
2209                reset_after=True).set(
2210                    failure_reason.classification if failure_reason else '',
2211                    fields=fields)
2212
2213        metrics.SecondsDistribution(
2214                PROVISION_PATH + '/auto_update_duration_by_devserver_dut').add(
2215                        duration, fields=fields)
2216
2217
2218    def _emit_provision_metrics(self, error_list, duration_list,
2219                                is_au_success, board, build_type, milestone,
2220                                dut_host_name, is_aue2etest,
2221                                total_duration, build_name):
2222        """Send metrics for provision request.
2223
2224        Provision represents potentially multiple auto update attempts.
2225
2226        Please note: to avoid reaching or exceeding the monarch field
2227        cardinality limit, we avoid a metric that includes both dut hostname
2228        and other high cardinality fields.
2229
2230        @param error_list: a list of DevServerExceptionClassifier objects to
2231            show errors happened in provision. Usually it contains 1 ~
2232            AU_RETRY_LIMIT objects since we only retry provision for several
2233            times.
2234        @param duration_list: a list of provision duration time, counted by
2235            seconds.
2236        @param is_au_success: a field in metrics, representing whether this
2237            auto_update succeeds or not.
2238        @param board: a field in metrics representing which board this
2239            auto_update tries to update.
2240        @param build_type: a field in metrics representing which build type this
2241            auto_update tries to update.
2242        @param milestone: a field in metrics representing which milestone this
2243            auto_update tries to update.
2244        @param dut_host_name: a field in metrics representing which DUT this
2245            auto_update tries to update.
2246        @param is_aue2etest: a field in metrics representing if provision was
2247            done as part of the autoupdate_EndToEndTest.
2248        """
2249        # The following is high cardinality, but sparse.
2250        # Each DUT is of a single board type, and likely build type.
2251        # The affinity also results in each DUT being attached to the same
2252        # dev_server as well.
2253        fields = {
2254                'board': board,
2255                'build_type': build_type,
2256                'dut_host_name': dut_host_name,
2257                'dev_server': self.resolved_hostname,
2258                'success': is_au_success,
2259        }
2260
2261        # reset_after=True is required for String gauges events to ensure that
2262        # the metrics are not repeatedly emitted until the server restarts.
2263
2264        metrics.String(PROVISION_PATH + '/provision_build_by_devserver_dut',
2265                       reset_after=True).set(build_name, fields=fields)
2266
2267        if error_list:
2268            metrics.String(
2269                    PROVISION_PATH +
2270                    '/provision_failure_reason_by_devserver_dut',
2271                    reset_after=True).set(error_list[0].classification,
2272                                          fields=fields)
2273
2274        metrics.SecondsDistribution(
2275                PROVISION_PATH + '/provision_duration_by_devserver_dut').add(
2276                        total_duration, fields=fields)
2277
2278
2279    def _parse_buildname_from_gs_uri(self, uri):
2280        """Get parameters needed for AU metrics when build_name is not known.
2281
2282        autoupdate_EndToEndTest is run with two Google Storage URIs from the
2283        gs://chromeos-releases bucket. URIs in this bucket do not have the
2284        build_name in the format samus-release/R60-0000.0.0.
2285
2286        We can get the milestone and board by checking the instructions.json
2287        file contained in the bucket with the payloads.
2288
2289        @param uri: The partial uri we received from autoupdate_EndToEndTest.
2290        """
2291        try:
2292            # Get the instructions file that contains info about the build.
2293            gs_file = 'gs://chromeos-releases/' + uri + '/*instructions.json'
2294            files = bin_utils.gs_ls(gs_file)
2295            for f in files:
2296                gs_folder, _, instruction_file = f.rpartition('/')
2297                self.stage_artifacts(image=uri,
2298                                     files=[instruction_file],
2299                                     archive_url=gs_folder)
2300                json_file = self.get_staged_file_url(instruction_file, uri)
2301                response = urllib2.urlopen(json_file)
2302                data = json.load(response)
2303                return data['board'], 'release', data['version']['milestone']
2304        except (ValueError, error.CmdError, urllib2.URLError) as e:
2305            logging.debug('Problem getting values for metrics: %s', e)
2306            logging.warning('Unable to parse build name %s from AU test for '
2307                            'metrics. Continuing anyway.', uri)
2308
2309        return '', '', ''
2310
2311
2312    def auto_update(self, host_name, build_name, original_board=None,
2313                    original_release_version=None, log_dir=None,
2314                    force_update=False, full_update=False,
2315                    payload_filename=None, force_original=False,
2316                    clobber_stateful=True, quick_provision=False):
2317        """Auto-update a CrOS host.
2318
2319        @param host_name: The hostname of the DUT to auto-update.
2320        @param build_name:  The build name to be auto-updated on the DUT.
2321        @param original_board: The original board of the DUT to auto-update.
2322        @param original_release_version: The release version of the DUT's
2323            current build.
2324        @param log_dir: The log directory to store auto-update logs from
2325            devserver.
2326        @param force_update: Force an update even if the version installed
2327                             is the same. Default: False.
2328        @param full_update:  If True, do not run stateful update, directly
2329                             force a full reimage. If False, try stateful
2330                             update first if the dut is already installed
2331                             with the same version.
2332        @param payload_filename: Used to specify the exact file to
2333                                 use for autoupdating. If None, the payload
2334                                 will be determined by build_name. You
2335                                 must have already staged this file before
2336                                 passing it in here.
2337        @param force_original: Whether to force stateful update with the
2338                               original payload.
2339        @param clobber_stateful: If True do a clean install of stateful.
2340        @param quick_provision: Attempt to use quick provision path first.
2341
2342        @return A set (is_success, pid) in which:
2343            1. is_success indicates whether this auto_update succeeds.
2344            2. pid is the process id of the successful autoupdate run.
2345
2346        @raise DevServerException if auto_update fails and is not retryable.
2347        @raise RetryableProvisionException if it fails and is retryable.
2348        """
2349        kwargs = {'host_name': host_name,
2350                  'build_name': build_name,
2351                  'force_update': force_update,
2352                  'full_update': full_update,
2353                  'clobber_stateful': clobber_stateful,
2354                  'quick_provision': quick_provision}
2355
2356        is_aue2etest = payload_filename is not None
2357
2358        if is_aue2etest:
2359            kwargs['payload_filename'] = payload_filename
2360
2361        error_msg = 'CrOS auto-update failed for host %s: %s'
2362        error_msg_attempt = 'Exception raised on auto_update attempt #%s:\n%s'
2363        is_au_success = False
2364        au_log_dir = os.path.join(log_dir,
2365                                  AUTO_UPDATE_LOG_DIR) if log_dir else None
2366        error_list = []
2367        retry_with_another_devserver = False
2368        duration_list = []
2369
2370        if is_aue2etest:
2371            board, build_type, milestone = self._parse_buildname_from_gs_uri(
2372                build_name)
2373        else:
2374            board, build_type, milestone = self._parse_buildname_safely(
2375                build_name)
2376
2377        provision_start_time = time.time()
2378        for au_attempt in range(AU_RETRY_LIMIT):
2379            logging.debug('Start CrOS auto-update for host %s at %d time(s).',
2380                          host_name, au_attempt + 1)
2381            au_start_time = time.time()
2382            failure_reason = None
2383            # No matter _trigger_auto_update succeeds or fails, the auto-update
2384            # track_status_file should be cleaned, and the auto-update execute
2385            # log should be collected to directory sysinfo. Also, the error
2386            # raised by _trigger_auto_update should be displayed.
2387            try:
2388                # Try update with stateful.tgz of old release version in the
2389                # last try of auto-update.
2390                if force_original and original_release_version:
2391                    # Monitor this case in monarch
2392                    original_build = '%s/%s' % (original_board,
2393                                                original_release_version)
2394                    c = metrics.Counter(
2395                            'chromeos/autotest/provision/'
2396                            'cros_update_with_original_build')
2397                    f = {'dev_server': self.resolved_hostname,
2398                         'board': board,
2399                         'build_type': build_type,
2400                         'milestone': milestone,
2401                         'original_build': original_build}
2402                    c.increment(fields=f)
2403
2404                    logging.debug('Try updating stateful partition of the '
2405                                  'host with the same version of its current '
2406                                  'rootfs partition: %s', original_build)
2407                    response = self._trigger_auto_update(
2408                            original_build=original_build, **kwargs)
2409                else:
2410                    response = self._trigger_auto_update(**kwargs)
2411            except DevServerException as e:
2412                logging.debug(error_msg_attempt, au_attempt+1, str(e))
2413                failure_reason = DevServerExceptionClassifier(str(e))
2414            else:
2415                _, raised_error, pid = self.check_for_auto_update_finished(
2416                        response, **kwargs)
2417
2418                # Error happens in _collect_au_log won't be raised.
2419                if au_log_dir:
2420                    is_collect_success = self.collect_au_log(
2421                            kwargs['host_name'], pid, au_log_dir)
2422                else:
2423                    is_collect_success = True
2424
2425                # Error happens in _clean_track_log won't be raised.
2426                if pid >= 0:
2427                    is_clean_success = self.clean_track_log(
2428                            kwargs['host_name'], pid)
2429                else:
2430                    is_clean_success = True
2431
2432                # If any error is raised previously, log it and retry
2433                # auto-update. Otherwise, claim a successful CrOS auto-update.
2434                if (not raised_error and is_clean_success and
2435                    is_collect_success):
2436                    logging.debug('CrOS auto-update succeed for host %s',
2437                                  host_name)
2438                    is_au_success = True
2439                    break
2440                else:
2441                    if not self.kill_au_process_for_host(kwargs['host_name'],
2442                                                         pid):
2443                        logging.debug('Failed to kill auto_update process %d',
2444                                      pid)
2445                    if raised_error:
2446                        error_str = str(raised_error)
2447                        logging.debug(error_msg_attempt, au_attempt + 1,
2448                                      error_str)
2449                        if au_log_dir:
2450                            logging.debug('Please see error details in log %s',
2451                                          self._get_au_log_filename(
2452                                                  au_log_dir,
2453                                                  kwargs['host_name'],
2454                                                  pid))
2455                        failure_reason = DevServerExceptionClassifier(
2456                            error_str, keep_full_trace=False)
2457                        if self._is_retryable(error_str):
2458                            retry_with_another_devserver = True
2459
2460                        if self._should_use_original_payload(error_str):
2461                            force_original = True
2462
2463            finally:
2464                duration = int(time.time() - au_start_time)
2465                duration_list.append(duration)
2466                if failure_reason:
2467                    error_list.append(failure_reason)
2468                self._emit_auto_update_metrics(board, build_type, host_name,
2469                                               build_name, au_attempt + 1,
2470                                               is_au_success, failure_reason,
2471                                               duration)
2472                if retry_with_another_devserver:
2473                    break
2474
2475                if not is_au_success and au_attempt < AU_RETRY_LIMIT - 1:
2476                    time.sleep(CROS_AU_RETRY_INTERVAL)
2477                    # Use the IP of DUT if the hostname failed.
2478                    host_name_ip = socket.gethostbyname(host_name)
2479                    kwargs['host_name'] = host_name_ip
2480                    logging.debug(
2481                            'AU failed, trying IP instead of hostname: %s',
2482                            host_name_ip)
2483
2484        total_duration = int(time.time() - provision_start_time)
2485        self._emit_provision_metrics(error_list, duration_list, is_au_success,
2486                                     board, build_type, milestone, host_name,
2487                                     is_aue2etest, total_duration, build_name)
2488
2489        if is_au_success:
2490            return (is_au_success, pid)
2491
2492        # If errors happen in the CrOS AU process, report the concatenation
2493        # of the errors happening in first & second provision.
2494        # If error happens in RPCs of cleaning track log, collecting
2495        # auto-update logs, or killing auto-update processes, just report a
2496        # common error here.
2497        if error_list:
2498            real_error = ', '.join(['%d) %s' % (i, e.summary)
2499                                    for i, e in enumerate(error_list)])
2500            if retry_with_another_devserver:
2501                raise RetryableProvisionException(
2502                        error_msg % (host_name, real_error))
2503            else:
2504                raise error_list[0].classified_exception(
2505                    error_msg % (host_name, real_error))
2506        else:
2507            raise DevServerException(error_msg % (
2508                        host_name, ('RPC calls after the whole auto-update '
2509                                    'process failed.')))
2510
2511
2512class AndroidBuildServer(ImageServerBase):
2513    """Class for DevServer that handles RPCs related to Android builds.
2514
2515    The calls to devserver to stage artifacts, including stage and download, are
2516    made in async mode. That is, when caller makes an RPC |stage| to request
2517    devserver to stage certain artifacts, devserver handles the call and starts
2518    staging artifacts in a new thread, and return |Success| without waiting for
2519    staging being completed. When caller receives message |Success|, it polls
2520    devserver's is_staged call until all artifacts are staged.
2521    Such mechanism is designed to prevent cherrypy threads in devserver being
2522    running out, as staging artifacts might take long time, and cherrypy starts
2523    with a fixed number of threads that handle devserver rpc.
2524    """
2525
2526    def wait_for_artifacts_staged(self, target, build_id, branch,
2527                                  archive_url=None, artifacts='', files=''):
2528        """Polling devserver.is_staged until all artifacts are staged.
2529
2530        @param target: Target of the android build to stage, e.g.,
2531                       shamu-userdebug.
2532        @param build_id: Build id of the android build to stage.
2533        @param branch: Branch of the android build to stage.
2534        @param archive_url: Google Storage URL for the build.
2535        @param artifacts: Comma separated list of artifacts to download.
2536        @param files: Comma separated list of files to download.
2537
2538        @return: True if all artifacts are staged in devserver.
2539        """
2540        kwargs = {'target': target,
2541                  'build_id': build_id,
2542                  'branch': branch,
2543                  'artifacts': artifacts,
2544                  'files': files,
2545                  'os_type': 'android'}
2546        if archive_url:
2547            kwargs['archive_url'] = archive_url
2548        return self._poll_is_staged(**kwargs)
2549
2550
2551    @remote_devserver_call()
2552    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
2553                      artifacts, files, error_message,
2554                      expected_response=SUCCESS):
2555        """Helper method to make a urlopen call, and wait for artifacts staged.
2556
2557        @param call_name: name of devserver rpc call.
2558        @param target: Target of the android build to stage, e.g.,
2559                       shamu-userdebug.
2560        @param build_id: Build id of the android build to stage.
2561        @param branch: Branch of the android build to stage.
2562        @param archive_url: Google Storage URL for the CrOS build.
2563        @param artifacts: Comma separated list of artifacts to download.
2564        @param files: Comma separated list of files to download.
2565        @param expected_response: Expected response from rpc, default to
2566                                  |Success|. If it's set to None, do not compare
2567                                  the actual response. Any response is consider
2568                                  to be good.
2569        @param error_message: Error message to be thrown if response does not
2570                              match expected_response.
2571
2572        @return: The response from rpc.
2573        @raise DevServerException upon any return code that's expected_response.
2574
2575        """
2576        kwargs = {'target': target,
2577                  'build_id': build_id,
2578                  'branch': branch,
2579                  'artifacts': artifacts,
2580                  'files': files,
2581                  'os_type': 'android'}
2582        if archive_url:
2583            kwargs['archive_url'] = archive_url
2584        return self._call_and_wait(call_name, error_message, expected_response,
2585                                   **kwargs)
2586
2587
2588    @remote_devserver_call()
2589    def stage_artifacts(self, target=None, build_id=None, branch=None,
2590                        image=None, artifacts=None, files='', archive_url=None):
2591        """Tell the devserver to download and stage |artifacts| from |image|.
2592
2593         This is the main call point for staging any specific artifacts for a
2594        given build. To see the list of artifacts one can stage see:
2595
2596        ~src/platfrom/dev/artifact_info.py.
2597
2598        This is maintained along with the actual devserver code.
2599
2600        @param target: Target of the android build to stage, e.g.,
2601                               shamu-userdebug.
2602        @param build_id: Build id of the android build to stage.
2603        @param branch: Branch of the android build to stage.
2604        @param image: Name of a build to test, in the format of
2605                      branch/target/build_id
2606        @param artifacts: A list of artifacts.
2607        @param files: A list of files to stage.
2608        @param archive_url: Optional parameter that has the archive_url to stage
2609                this artifact from. Default is specified in autotest config +
2610                image.
2611
2612        @raise DevServerException upon any return code that's not HTTP OK.
2613        """
2614        if image and not target and not build_id and not branch:
2615            branch, target, build_id = utils.parse_launch_control_build(image)
2616        if not target or not build_id or not branch:
2617            raise DevServerException('Must specify all build info (target, '
2618                                     'build_id and branch) to stage.')
2619
2620        android_build_info = {'target': target,
2621                              'build_id': build_id,
2622                              'branch': branch}
2623        if not artifacts and not files:
2624            raise DevServerException('Must specify something to stage.')
2625        if not all(android_build_info.values()):
2626            raise DevServerException(
2627                    'To stage an Android build, must specify target, build id '
2628                    'and branch.')
2629        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2630        self._stage_artifacts(build, artifacts, files, archive_url,
2631                              **android_build_info)
2632
2633    def trigger_download(self, target, build_id, branch, artifacts=None,
2634                         files='', os='android', synchronous=True):
2635        """Tell the devserver to download and stage an Android build.
2636
2637        Tells the devserver to fetch an Android build from the image storage
2638        server named by _get_image_storage_server().
2639
2640        If |synchronous| is True, waits for the entire download to finish
2641        staging before returning. Otherwise only the artifacts necessary
2642        to start installing images onto DUT's will be staged before returning.
2643        A caller can then call finish_download to guarantee the rest of the
2644        artifacts have finished staging.
2645
2646        @param target: Target of the android build to stage, e.g.,
2647                       shamu-userdebug.
2648        @param build_id: Build id of the android build to stage.
2649        @param branch: Branch of the android build to stage.
2650        @param artifacts: A string of artifacts separated by comma. If None,
2651               use the default artifacts for Android or Brillo build.
2652        @param files: String of file seperated by commas.
2653        @param os: OS artifacts to download (android/brillo).
2654        @param synchronous: if True, waits until all components of the image are
2655               staged before returning.
2656
2657        @raise DevServerException upon any return code that's not HTTP OK.
2658
2659        """
2660        android_build_info = {'target': target,
2661                              'build_id': build_id,
2662                              'branch': branch}
2663        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2664        if not artifacts:
2665            board = target.split('-')[0]
2666            artifacts = (
2667                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2668                        board, os))
2669        self._trigger_download(build, artifacts, files=files,
2670                               synchronous=synchronous, **android_build_info)
2671
2672
2673    def finish_download(self, target, build_id, branch, os='android'):
2674        """Tell the devserver to finish staging an Android build.
2675
2676        If trigger_download is called with synchronous=False, it will return
2677        before all artifacts have been staged. This method contacts the
2678        devserver and blocks until all staging is completed and should be
2679        called after a call to trigger_download.
2680
2681        @param target: Target of the android build to stage, e.g.,
2682                       shamu-userdebug.
2683        @param build_id: Build id of the android build to stage.
2684        @param branch: Branch of the android build to stage.
2685        @param os: OS artifacts to download (android/brillo).
2686
2687        @raise DevServerException upon any return code that's not HTTP OK.
2688        """
2689        android_build_info = {'target': target,
2690                              'build_id': build_id,
2691                              'branch': branch}
2692        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2693        board = target.split('-')[0]
2694        artifacts = (
2695                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2696                        board))
2697        self._finish_download(build, artifacts, files='', **android_build_info)
2698
2699
2700    def get_staged_file_url(self, filename, target, build_id, branch):
2701        """Returns the url of a staged file for this image on the devserver.
2702
2703        @param filename: Name of the file.
2704        @param target: Target of the android build to stage, e.g.,
2705                       shamu-userdebug.
2706        @param build_id: Build id of the android build to stage.
2707        @param branch: Branch of the android build to stage.
2708
2709        @return: The url of a staged file for this image on the devserver.
2710        """
2711        android_build_info = {'target': target,
2712                              'build_id': build_id,
2713                              'branch': branch,
2714                              'os_type': 'android'}
2715        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2716        return '/'.join([self._get_image_url(build), filename])
2717
2718
2719    @remote_devserver_call()
2720    def translate(self, build_name):
2721        """Translate the build name if it's in LATEST format.
2722
2723        If the build name is in the format [branch]/[target]/LATEST, return the
2724        latest build in Launch Control otherwise return the build name as is.
2725
2726        @param build_name: build_name to check.
2727
2728        @return The actual build name to use.
2729        """
2730        branch, target, build_id = utils.parse_launch_control_build(build_name)
2731        if build_id.upper() != 'LATEST':
2732            return build_name
2733        call = self.build_call('latestbuild', branch=branch, target=target,
2734                               os_type='android')
2735        translated_build_id = self.run_call(call)
2736        translated_build = (ANDROID_BUILD_NAME_PATTERN %
2737                            {'branch': branch,
2738                             'target': target,
2739                             'build_id': translated_build_id})
2740        logging.debug('Translated relative build %s to %s', build_name,
2741                      translated_build)
2742        return translated_build
2743
2744
2745def _is_load_healthy(load):
2746    """Check if devserver's load meets the minimum threshold.
2747
2748    @param load: The devserver's load stats to check.
2749
2750    @return: True if the load meets the minimum threshold. Return False
2751             otherwise.
2752
2753    """
2754    # Threshold checks, including CPU load.
2755    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
2756        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
2757                      'than the threshold of %s%%', load['devserver'],
2758                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
2759        return False
2760    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
2761        logging.debug('Network IO of devserver %s is at %i Bps, which is '
2762                      'higher than the threshold of %i bytes per second.',
2763                      load['devserver'], load[DevServer.NETWORK_IO],
2764                      DevServer.MAX_NETWORK_IO)
2765        return False
2766    return True
2767
2768
2769def _compare_load(devserver1, devserver2):
2770    """Comparator function to compare load between two devservers.
2771
2772    @param devserver1: A dictionary of devserver load stats to be compared.
2773    @param devserver2: A dictionary of devserver load stats to be compared.
2774
2775    @return: Negative value if the load of `devserver1` is less than the load
2776             of `devserver2`. Return positive value otherwise.
2777
2778    """
2779    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
2780
2781
2782def _get_subnet_for_host_ip(host_ip,
2783                            restricted_subnets=utils.RESTRICTED_SUBNETS):
2784    """Get the subnet for a given host IP.
2785
2786    @param host_ip: the IP of a DUT.
2787    @param restricted_subnets: A list of restriected subnets.
2788
2789    @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the
2790             host_ip, return (None, None).
2791    """
2792    for subnet_ip, mask_bits in restricted_subnets:
2793        if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
2794            return subnet_ip, mask_bits
2795
2796    return None, None
2797
2798
2799def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2800    """Get the devserver with the least load.
2801
2802    Iterate through all devservers and get the one with least load.
2803
2804    TODO(crbug.com/486278): Devserver with required build already staged should
2805    take higher priority. This will need check_health call to be able to verify
2806    existence of a given build/artifact. Also, in case all devservers are
2807    overloaded, the logic here should fall back to the old behavior that randomly
2808    selects a devserver based on the hash of the image name/url.
2809
2810    @param devserver_type: Type of devserver to select from. Default is set to
2811                           ImageServer.
2812    @param hostname: Hostname of the dut that the devserver is used for. The
2813            picked devserver needs to respect the location of the host if
2814            `prefer_local_devserver` is set to True or `restricted_subnets` is
2815            set.
2816
2817    @return: Name of the devserver with the least load.
2818
2819    """
2820    logging.debug('Get the least loaded %r', devserver_type)
2821    devservers, can_retry = devserver_type.get_available_devservers(
2822            hostname)
2823    # If no healthy devservers available and can_retry is False, return None.
2824    # Otherwise, relax the constrain on hostname, allow all devservers to be
2825    # available.
2826    if not devserver_type.get_healthy_devserver('', devservers):
2827        if not can_retry:
2828            return None
2829        else:
2830            devservers, _ = devserver_type.get_available_devservers()
2831
2832    # get_devserver_load call needs to be made in a new process to allow force
2833    # timeout using signal.
2834    output = multiprocessing.Queue()
2835    processes = []
2836    for devserver in devservers:
2837        processes.append(multiprocessing.Process(
2838                target=devserver_type.get_devserver_load_wrapper,
2839                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2840
2841    for p in processes:
2842        p.start()
2843    for p in processes:
2844        # The timeout for the process commands aren't reliable.  Add
2845        # some extra time to the timeout for potential overhead in the
2846        # subprocesses.  crbug.com/913695
2847        p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10)
2848    # Read queue before killing processes to avoid corrupting the queue.
2849    loads = [output.get() for p in processes if not p.is_alive()]
2850    for p in processes:
2851        if p.is_alive():
2852            p.terminate()
2853    # Filter out any load failed to be retrieved or does not support load check.
2854    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2855             DevServer.is_free_disk_ok(load) and
2856             DevServer.is_apache_client_count_ok(load)]
2857    if not loads:
2858        logging.debug('Failed to retrieve load stats from any devserver. No '
2859                      'load balancing can be applied.')
2860        return None
2861    loads = [load for load in loads if _is_load_healthy(load)]
2862    if not loads:
2863        logging.error('No devserver has the capacity to be selected.')
2864        return None
2865    loads = sorted(loads, cmp=_compare_load)
2866    return loads[0]['devserver']
2867
2868
2869def resolve(build, hostname=None, ban_list=None):
2870    """Resolve a devserver can be used for given build and hostname.
2871
2872    @param build: Name of a build to stage on devserver, e.g.,
2873                  ChromeOS build: daisy-release/R50-1234.0.0
2874                  Launch Control build: git_mnc_release/shamu-eng
2875    @param hostname: Hostname of a devserver for, default is None, which means
2876            devserver is not restricted by the network location of the host.
2877    @param ban_list: The blacklist of devservers shouldn't be chosen.
2878
2879    @return: A DevServer instance that can be used to stage given build for the
2880             given host.
2881    """
2882    if utils.is_launch_control_build(build):
2883        return AndroidBuildServer.resolve(build, hostname)
2884    else:
2885        return ImageServer.resolve(build, hostname, ban_list=ban_list)
2886