1# Lint as: python2, python3
2"""
3This module defines the PackageManager class which provides an
4implementation of the packaging system API providing methods to fetch,
5upload and remove packages.
6"""
7
8#pylint: disable=missing-docstring
9
10from __future__ import absolute_import
11from __future__ import division
12from __future__ import print_function
13
14import fcntl
15import logging
16import os
17import re
18import shutil
19import six
20
21import common
22from autotest_lib.client.bin import os_dep
23from autotest_lib.client.common_lib import error
24from autotest_lib.client.common_lib import global_config
25from autotest_lib.client.common_lib import utils
26
27
28# the name of the checksum file that stores the packages' checksums
29CHECKSUM_FILE = "packages.checksum"
30
31
32def has_pbzip2():
33    '''Check if parallel bzip2 is available on this system.'''
34    try:
35        os_dep.command('pbzip2')
36    except ValueError:
37        return False
38    return True
39
40
41# is parallel bzip2 available for use?
42_PBZIP2_AVAILABLE = has_pbzip2()
43
44
45def parse_ssh_path(repo):
46    '''
47    Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
48    remote path
49    '''
50
51    match = re.search('^ssh://(.*?)(/.*)$', repo)
52    if match:
53        return match.groups()
54    else:
55        raise error.PackageUploadError(
56            "Incorrect SSH path in global_config: %s" % repo)
57
58
59def repo_run_command(repo, cmd, ignore_status=False, cd=True):
60    """Run a command relative to the repos path"""
61    repo = repo.strip()
62    run_cmd = None
63    cd_str = ''
64    if repo.startswith('ssh://'):
65        username = None
66        hostline, remote_path = parse_ssh_path(repo)
67        if cd:
68            cd_str = 'cd %s && ' % remote_path
69        if '@' in hostline:
70            username, host = hostline.split('@')
71            run_cmd = 'ssh %s@%s "%s%s"' % (username, host, cd_str, cmd)
72        else:
73            run_cmd = 'ssh %s "%s%s"' % (host, cd_str, cmd)
74
75    else:
76        if cd:
77            cd_str = 'cd %s && ' % repo
78        run_cmd = "%s%s" % (cd_str, cmd)
79
80    if run_cmd:
81        return utils.run(run_cmd, ignore_status=ignore_status)
82
83
84def create_directory(repo):
85    remote_path = repo
86    if repo.startswith('ssh://'):
87        _, remote_path = parse_ssh_path(repo)
88    repo_run_command(repo, 'mkdir -p %s' % remote_path, cd=False)
89
90
91def check_diskspace(repo, min_free=None):
92    # Note: 1 GB = 10**9 bytes (SI unit).
93    if min_free is None:
94        min_free = global_config.global_config.get_config_value('PACKAGES',
95                                                          'minimum_free_space',
96                                                          type=int, default=1)
97    try:
98        df = repo_run_command(repo,
99                              'df -PB %d . | tail -1' % 10 ** 9).stdout.split()
100        free_space_gb = int(df[3])
101    except Exception as e:
102        raise error.RepoUnknownError('Unknown Repo Error: %s' % e)
103    if free_space_gb < min_free:
104        raise error.RepoDiskFullError('Not enough disk space available '
105                                      '%sg < %sg' % (free_space_gb, min_free))
106
107
108def check_write(repo):
109    try:
110        repo_testfile = '.repo_test_file'
111        repo_run_command(repo, 'touch %s' % repo_testfile).stdout.strip()
112        repo_run_command(repo, 'rm ' + repo_testfile)
113    except error.CmdError:
114        raise error.RepoWriteError('Unable to write to ' + repo)
115
116
117def trim_custom_directories(repo, older_than_days=None):
118    if not repo:
119        return
120
121    if older_than_days is None:
122        older_than_days = global_config.global_config.get_config_value(
123            'PACKAGES', 'custom_max_age', type=int, default=40)
124    cmd = 'find . -type f -atime +%s -exec rm -f {} \;' % older_than_days
125    repo_run_command(repo, cmd, ignore_status=True)
126
127
128class RepositoryFetcher(object):
129    url = None
130
131
132    def fetch_pkg_file(self, filename, dest_path):
133        """ Fetch a package file from a package repository.
134
135        @param filename: The filename of the package file to fetch.
136        @param dest_path: Destination path to download the file to.
137
138        @raises PackageFetchError if the fetch failed
139        """
140        raise NotImplementedError()
141
142
143class HttpFetcher(RepositoryFetcher):
144    curl_cmd_pattern = 'curl --connect-timeout 15 -s %s -o %s'
145
146
147    def __init__(self, package_manager, repository_url):
148        """
149        @param repository_url: The base URL of the http repository
150        """
151        self.run_command = package_manager._run_command
152        self.url = repository_url
153
154    def exists(self, destpath, target='file'):
155        """Check if a file or directory exists using `test`.
156
157        This is a wrapper for run_command.
158
159        Args:
160          target: Optional string that should either be 'file' or 'dir'
161                  indicating what should exist.
162        """
163        if target == 'dir':
164            test_cmd = 'test -d %s'
165        else:
166            test_cmd = 'test -e %s'
167
168        try:
169            self.run_command(test_cmd % destpath)
170            return True
171        except (error.CmdError, error.AutoservRunError):
172            return False
173
174    def _quick_http_test(self):
175        """ Run a simple 30 second curl on the repository to see if it is
176        reachable. This avoids the need to wait for a full 10min timeout.
177        """
178        # just make a temp file to write a test fetch into
179        mktemp = 'mktemp -u /tmp/tmp.XXXXXX'
180        dest_file_path = self.run_command(mktemp).stdout.strip()
181
182        try:
183            # build up a curl command
184            http_cmd = self.curl_cmd_pattern % (self.url, dest_file_path)
185            try:
186                self.run_command(http_cmd, _run_command_dargs={'timeout': 30})
187            except Exception as e:
188                msg = 'HTTP test failed, unable to contact %s: %s'
189                raise error.PackageFetchError(msg % (self.url, e))
190        finally:
191            self.run_command('rm -rf %s' % dest_file_path)
192
193
194    def fetch_pkg_file(self, filename, dest_path):
195        logging.info('Fetching %s from %s to %s', filename, self.url,
196                     dest_path)
197
198        # do a quick test to verify the repo is reachable
199        self._quick_http_test()
200
201        # try to retrieve the package via http
202        package_url = os.path.join(self.url, filename)
203        try:
204            cmd = self.curl_cmd_pattern % (package_url, dest_path)
205            result = self.run_command(cmd,
206                                      _run_command_dargs={'timeout': 1200})
207
208            if not self.exists(dest_path):
209                logging.error('curl failed: %s', result)
210                raise error.CmdError(cmd, result)
211
212            logging.info('Successfully fetched %s from %s', filename,
213                         package_url)
214        except error.CmdError as e:
215            # remove whatever junk was retrieved when the get failed
216            self.run_command('rm -f %s' % dest_path)
217
218            raise error.PackageFetchError('%s not found in %s\n%s'
219                    'curl error code: %d' % (filename, package_url,
220                    e.result_obj.stderr, e.result_obj.exit_status))
221
222
223class LocalFilesystemFetcher(RepositoryFetcher):
224    def __init__(self, package_manager, local_dir):
225        self.run_command = package_manager._run_command
226        self.url = local_dir
227
228
229    def fetch_pkg_file(self, filename, dest_path):
230        logging.info('Fetching %s from %s to %s', filename, self.url,
231                     dest_path)
232        local_path = os.path.join(self.url, filename)
233        try:
234            self.run_command('cp %s %s' % (local_path, dest_path))
235            logging.debug('Successfully fetched %s from %s', filename,
236                          local_path)
237        except error.CmdError as e:
238            raise error.PackageFetchError(
239                'Package %s could not be fetched from %s'
240                % (filename, self.url), e)
241
242
243class BasePackageManager(object):
244    def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
245                 upload_paths=None, do_locking=True, run_function=utils.run,
246                 run_function_args=[], run_function_dargs={}):
247        '''
248        repo_urls: The list of the repository urls which is consulted
249                   whilst fetching the package
250        upload_paths: The list of the upload of repositories to which
251                      the package is uploaded to
252        pkgmgr_dir : A directory that can be used by the package manager
253                      to dump stuff (like checksum files of the repositories
254                      etc.).
255        do_locking : Enable locking when the packages are installed.
256
257        run_function is used to execute the commands throughout this file.
258        It defaults to utils.run() but a custom method (if provided) should
259        be of the same schema as utils.run. It should return a CmdResult
260        object and throw a CmdError exception. The reason for using a separate
261        function to run the commands is that the same code can be run to fetch
262        a package on the local machine or on a remote machine (in which case
263        ssh_host's run function is passed in for run_function).
264        '''
265        # In memory dictionary that stores the checksum's of packages
266        self._checksum_dict = {}
267
268        self.pkgmgr_dir = pkgmgr_dir
269        self.do_locking = do_locking
270        self.hostname = hostname
271        self.repositories = []
272
273        # Create an internal function that is a simple wrapper of
274        # run_function and takes in the args and dargs as arguments
275        def _run_command(command, _run_command_args=run_function_args,
276                         _run_command_dargs={}):
277            '''
278            Special internal function that takes in a command as
279            argument and passes it on to run_function (if specified).
280            The _run_command_dargs are merged into run_function_dargs
281            with the former having more precedence than the latter.
282            '''
283            new_dargs = dict(run_function_dargs)
284            new_dargs.update(_run_command_dargs)
285            # avoid polluting logs with extremely verbose packaging output
286            new_dargs.update({'stdout_tee' : None})
287
288            return run_function(command, *_run_command_args,
289                                **new_dargs)
290
291        self._run_command = _run_command
292
293        # Process the repository URLs
294        if not repo_urls:
295            repo_urls = []
296        elif hostname:
297            repo_urls = self.get_mirror_list(repo_urls)
298        for url in repo_urls:
299            self.add_repository(url)
300
301        # Process the upload URLs
302        if not upload_paths:
303            self.upload_paths = []
304        else:
305            self.upload_paths = list(upload_paths)
306
307
308    def add_repository(self, repo):
309        if isinstance(repo, six.string_types):
310            self.repositories.append(self.get_fetcher(repo))
311        elif isinstance(repo, RepositoryFetcher):
312            self.repositories.append(repo)
313        else:
314            raise TypeError("repo must be RepositoryFetcher or url string")
315
316    def exists(self, destpath, target='file'):
317        """Check if a file or directory exists using `test`.
318
319        This is a wrapper for _run_command.
320
321        Args:
322          target: Optional string that should either be 'file' or 'dir'
323                  indicating what should exist.
324        """
325        if target == 'dir':
326            test_cmd = 'test -d %s'
327        else:
328            test_cmd = 'test -e %s'
329
330        try:
331            self._run_command(test_cmd % destpath)
332            return True
333        except (error.CmdError, error.AutoservRunError):
334            return False
335
336    def get_fetcher(self, url):
337        if url.startswith('http://'):
338            return HttpFetcher(self, url)
339        else:
340            return LocalFilesystemFetcher(self, url)
341
342
343    def repo_check(self, repo):
344        '''
345        Check to make sure the repo is in a sane state:
346        ensure we have at least XX amount of free space
347        Make sure we can write to the repo
348        '''
349        if not repo.startswith('/') and not repo.startswith('ssh:'):
350            return
351        try:
352            create_directory(repo)
353            check_diskspace(repo)
354            check_write(repo)
355        except (error.RepoWriteError, error.RepoUnknownError,
356                error.RepoDiskFullError) as e:
357            raise error.RepoError("ERROR: Repo %s: %s" % (repo, e))
358
359
360    def upkeep(self, custom_repos=None):
361        '''
362        Clean up custom upload/download areas
363        '''
364        from autotest_lib.server import subcommand
365        if not custom_repos:
366            # Not all package types necessarily require or allow custom repos
367            try:
368                custom_repos = global_config.global_config.get_config_value(
369                    'PACKAGES', 'custom_upload_location').split(',')
370            except global_config.ConfigError:
371                custom_repos = []
372            try:
373                custom_download = global_config.global_config.get_config_value(
374                    'PACKAGES', 'custom_download_location')
375                custom_repos += [custom_download]
376            except global_config.ConfigError:
377                pass
378
379            if not custom_repos:
380                return
381
382        subcommand.parallel_simple(trim_custom_directories, custom_repos,
383                                   log=False)
384
385
386    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
387                    preserve_install_dir=False, repo_url=None):
388        '''
389        Remove install_dir if it already exists and then recreate it unless
390        preserve_install_dir is specified as True.
391        Fetch the package into the pkg_dir. Untar the package into install_dir
392        The assumption is that packages are of the form :
393        <pkg_type>.<pkg_name>.tar.bz2
394        name        : name of the package
395        type        : type of the package
396        fetch_dir   : The directory into which the package tarball will be
397                      fetched to.
398        install_dir : the directory where the package files will be untarred to
399        repo_url    : the url of the repository to fetch the package from.
400        '''
401
402        # do_locking flag is on by default unless you disable it (typically
403        # in the cases where packages are directly installed from the server
404        # onto the client in which case fcntl stuff wont work as the code
405        # will run on the server in that case..
406        if self.do_locking:
407            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
408            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
409
410        try:
411            if self.do_locking:
412                fcntl.flock(lockfile, fcntl.LOCK_EX)
413
414            self._run_command('mkdir -p %s' % fetch_dir)
415
416            pkg_name = self.get_tarball_name(name, pkg_type)
417            fetch_path = os.path.join(fetch_dir, pkg_name)
418            try:
419                # Fetch the package into fetch_dir
420                self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
421
422                # check to see if the install_dir exists and if it does
423                # then check to see if the .checksum file is the latest
424                if (self.exists(install_dir, target='dir') and
425                    not self.untar_required(fetch_path, install_dir)):
426                    return
427
428                # untar the package into install_dir and
429                # update the checksum in that directory
430                if not preserve_install_dir:
431                    # Make sure we clean up the install_dir
432                    self._run_command('rm -rf %s' % install_dir)
433                self._run_command('mkdir -p %s' % install_dir)
434
435                self.untar_pkg(fetch_path, install_dir)
436
437            except error.PackageFetchError as why:
438                raise error.PackageInstallError(
439                    'Installation of %s(type:%s) failed : %s'
440                    % (name, pkg_type, why))
441        finally:
442            if self.do_locking:
443                fcntl.flock(lockfile, fcntl.LOCK_UN)
444                lockfile.close()
445
446
447    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
448        '''
449        Fetch the package into dest_dir from repo_url. By default repo_url
450        is None and the package is looked in all the repositories specified.
451        Otherwise it fetches it from the specific repo_url.
452        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
453                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
454        repo_url     : the URL of the repository where the package is located.
455        dest_path    : complete path of where the package will be fetched to.
456        use_checksum : This is set to False to fetch the packages.checksum file
457                       so that the checksum comparison is bypassed for the
458                       checksum file itself. This is used internally by the
459                       packaging system. It should be ignored by externals
460                       callers of this method who use it fetch custom packages.
461        '''
462        # Check if the destination dir exists.
463        if not self.exists(os.path.dirname(dest_path), target='dir'):
464            raise error.PackageFetchError("Please provide a valid "
465                                          "destination: %s " % dest_path)
466
467        # See if the package was already fetched earlier, if so
468        # the checksums need to be compared and the package is now
469        # fetched only if they differ.
470        pkg_exists = self.exists(dest_path)
471
472        # if a repository location is explicitly provided, fetch the package
473        # from there and return
474        if repo_url:
475            repositories = [self.get_fetcher(repo_url)]
476        elif self.repositories:
477            repositories = self.repositories
478        else:
479            raise error.PackageFetchError("No repository urls specified")
480
481        # install the package from the package repos, try the repos in
482        # reverse order, assuming that the 'newest' repos are most desirable
483        for fetcher in reversed(repositories):
484            try:
485                # Fetch the package if it is not there, the checksum does
486                # not match, or checksums are disabled entirely
487                need_to_fetch = (
488                        not use_checksum or not pkg_exists
489                        or not self.compare_checksum(dest_path))
490                if need_to_fetch:
491                    fetcher.fetch_pkg_file(pkg_name, dest_path)
492                    # update checksum so we won't refetch next time.
493                    if use_checksum:
494                        self.update_checksum(dest_path)
495                return
496            except (error.PackageFetchError, error.AutoservRunError) as e:
497                # The package could not be found in this repo, continue looking
498                logging.debug(e)
499
500        repo_url_list = [repo.url for repo in repositories]
501        message = ('%s could not be fetched from any of the repos %s' %
502                   (pkg_name, repo_url_list))
503        logging.debug(message)
504        # if we got here then that means the package is not found
505        # in any of the repositories.
506        raise error.PackageFetchError(message)
507
508
509    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False,
510                   timeout=300):
511        from autotest_lib.server import subcommand
512        if upload_path:
513            upload_path_list = [upload_path]
514            self.upkeep(upload_path_list)
515        elif len(self.upload_paths) > 0:
516            self.upkeep()
517            upload_path_list = self.upload_paths
518        else:
519            raise error.PackageUploadError("Invalid Upload Path specified")
520
521        if update_checksum:
522            # get the packages' checksum file and update it with the current
523            # package's checksum
524            self.update_checksum(pkg_path)
525
526        commands = []
527        for path in upload_path_list:
528            commands.append(subcommand.subcommand(self.upload_pkg_parallel,
529                                                  (pkg_path, path,
530                                                   update_checksum)))
531
532        results = subcommand.parallel(commands, timeout, return_results=True)
533        for result in results:
534            if result:
535                print(str(result))
536
537
538    # TODO(aganti): Fix the bug with the current checksum logic where
539    # packages' checksums that are not present consistently in all the
540    # repositories are not handled properly. This is a corner case though
541    # but the ideal solution is to make the checksum file repository specific
542    # and then maintain it.
543    def upload_pkg_parallel(self, pkg_path, upload_path, update_checksum=False):
544        '''
545        Uploads to a specified upload_path or to all the repos.
546        Also uploads the checksum file to all the repos.
547        pkg_path        : The complete path to the package file
548        upload_path     : the absolute path where the files are copied to.
549                          if set to 'None' assumes 'all' repos
550        update_checksum : If set to False, the checksum file is not
551                          going to be updated which happens by default.
552                          This is necessary for custom
553                          packages (like custom kernels and custom tests)
554                          that get uploaded which do not need to be part of
555                          the checksum file and bloat it.
556        '''
557        self.repo_check(upload_path)
558        # upload the package
559        if os.path.isdir(pkg_path):
560            self.upload_pkg_dir(pkg_path, upload_path)
561        else:
562            self.upload_pkg_file(pkg_path, upload_path)
563            if update_checksum:
564                self.upload_pkg_file(self._get_checksum_file_path(),
565                                     upload_path)
566
567
568    def upload_pkg_file(self, file_path, upload_path):
569        '''
570        Upload a single file. Depending on the upload path, the appropriate
571        method for that protocol is called. Currently this simply copies the
572        file to the target directory (but can be extended for other protocols)
573        This assumes that the web server is running on the same machine where
574        the method is being called from. The upload_path's files are
575        basically served by that web server.
576        '''
577        try:
578            if upload_path.startswith('ssh://'):
579                # parse ssh://user@host/usr/local/autotest/packages
580                hostline, remote_path = parse_ssh_path(upload_path)
581                try:
582                    utils.run('scp %s %s:%s' % (file_path, hostline,
583                                                remote_path))
584                    r_path = os.path.join(remote_path,
585                                          os.path.basename(file_path))
586                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, r_path))
587                except error.CmdError:
588                    logging.error("Error uploading to repository %s",
589                                  upload_path)
590            else:
591                # Delete any older version of the package that might exist.
592                orig_file = os.path.join(upload_path,
593                                         os.path.basename(file_path))
594                if os.path.exists(orig_file):
595                    os.remove(orig_file)
596
597                shutil.copy(file_path, upload_path)
598                os.chmod(orig_file, 0o644)
599        except (IOError, os.error) as why:
600            logging.error("Upload of %s to %s failed: %s", file_path,
601                          upload_path, why)
602
603
604    def upload_pkg_dir(self, dir_path, upload_path):
605        '''
606        Upload a full directory. Depending on the upload path, the appropriate
607        method for that protocol is called. Currently this copies the whole
608        tmp package directory to the target directory.
609        This assumes that the web server is running on the same machine where
610        the method is being called from. The upload_path's files are
611        basically served by that web server.
612        '''
613        local_path = os.path.join(dir_path, "*")
614        try:
615            if upload_path.startswith('ssh://'):
616                hostline, remote_path = parse_ssh_path(upload_path)
617                try:
618                    utils.run('scp %s %s:%s' % (local_path, hostline,
619                                                remote_path))
620                    ssh_path = os.path.join(remote_path, "*")
621                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
622                except error.CmdError:
623                    logging.error("Error uploading to repository: %s",
624                                  upload_path)
625            else:
626                utils.run("cp %s %s " % (local_path, upload_path))
627                up_path = os.path.join(upload_path, "*")
628                utils.run("chmod 644 %s" % up_path)
629        except (IOError, os.error) as why:
630            raise error.PackageUploadError("Upload of %s to %s failed: %s"
631                                           % (dir_path, upload_path, why))
632
633
634    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
635        '''
636        Remove the package from the specified remove_path
637        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
638                                           dep-gcc.tar.bz2)
639        remove_path : the location to remove the package from.
640
641        '''
642        if remove_path:
643            remove_path_list = [remove_path]
644        elif len(self.upload_paths) > 0:
645            remove_path_list = self.upload_paths
646        else:
647            raise error.PackageRemoveError(
648                "Invalid path to remove the pkg from")
649
650        checksum_path = self._get_checksum_file_path()
651
652        if remove_checksum:
653            self.remove_checksum(pkg_name)
654
655        # remove the package and upload the checksum file to the repos
656        for path in remove_path_list:
657            self.remove_pkg_file(pkg_name, path)
658            self.upload_pkg_file(checksum_path, path)
659
660
661    def remove_pkg_file(self, filename, pkg_dir):
662        '''
663        Remove the file named filename from pkg_dir
664        '''
665        try:
666            # Remove the file
667            if pkg_dir.startswith('ssh://'):
668                hostline, remote_path = parse_ssh_path(pkg_dir)
669                path = os.path.join(remote_path, filename)
670                utils.run("ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
671                          path))
672            else:
673                os.remove(os.path.join(pkg_dir, filename))
674        except (IOError, os.error) as why:
675            raise error.PackageRemoveError("Could not remove %s from %s: %s "
676                                           % (filename, pkg_dir, why))
677
678
679    def get_mirror_list(self, repo_urls):
680        '''
681            Stub function for site specific mirrors.
682
683            Returns:
684                Priority ordered list
685        '''
686        return repo_urls
687
688
689    def _get_checksum_file_path(self):
690        '''
691        Return the complete path of the checksum file (assumed to be stored
692        in self.pkgmgr_dir
693        '''
694        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
695
696
697    def _get_checksum_dict(self):
698        '''
699        Fetch the checksum file if not already fetched. If the checksum file
700        cannot be fetched from the repos then a new file is created with
701        the current package's (specified in pkg_path) checksum value in it.
702        Populate the local checksum dictionary with the values read from
703        the checksum file.
704        The checksum file is assumed to be present in self.pkgmgr_dir
705        '''
706        checksum_path = self._get_checksum_file_path()
707        if not self._checksum_dict:
708            # Fetch the checksum file
709            try:
710                if not self.exists(checksum_path):
711                    # The packages checksum file does not exist locally.
712                    # See if it is present in the repositories.
713                    self.fetch_pkg(CHECKSUM_FILE, checksum_path)
714            except error.PackageFetchError:
715                # This should not happen whilst fetching a package..if a
716                # package is present in the repository, the corresponding
717                # checksum file should also be automatically present. This
718                # case happens only when a package
719                # is being uploaded and if it is the first package to be
720                # uploaded to the repos (hence no checksum file created yet)
721                # Return an empty dictionary in that case
722                return {}
723
724            # Read the checksum file into memory
725            checksum_file_contents = self._run_command('cat '
726                                                       + checksum_path).stdout
727
728            # Return {} if we have an empty checksum file present
729            if not checksum_file_contents.strip():
730                return {}
731
732            # Parse the checksum file contents into self._checksum_dict
733            for line in checksum_file_contents.splitlines():
734                checksum, package_name = line.split(None, 1)
735                self._checksum_dict[package_name] = checksum
736
737        return self._checksum_dict
738
739
740    def _save_checksum_dict(self, checksum_dict):
741        '''
742        Save the checksum dictionary onto the checksum file. Update the
743        local _checksum_dict variable with this new set of values.
744        checksum_dict :  New checksum dictionary
745        checksum_dir  :  The directory in which to store the checksum file to.
746        '''
747        checksum_path = self._get_checksum_file_path()
748        self._checksum_dict = checksum_dict.copy()
749        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
750                                      for pkg_name, checksum in
751                                      six.iteritems(checksum_dict))
752        # Write the checksum file back to disk
753        self._run_command('echo "%s" > %s' % (checksum_contents,
754                                              checksum_path),
755                          _run_command_dargs={'verbose': False})
756
757
758    def compute_checksum(self, pkg_path):
759        '''
760        Compute the MD5 checksum for the package file and return it.
761        pkg_path : The complete path for the package file
762        '''
763        # Check if the checksum has been pre-calculated.
764        # There are two modes of operation:
765        #
766        # 1. Package is compiled on dev machine / build server : In this
767        # case, we will have the freshest checksum during the install
768        # phase (which was computed and stored during src_compile). The
769        # checksum always gets recomputed during src_compile.
770        #
771        # 2. Package in installed from a fetched prebuilt: Here, we will
772        # have the checksum associated with what was used to compile
773        # the prebuilt. So it is expected to be the same.
774        checksum_path = pkg_path + '.checksum'
775        if os.path.exists(checksum_path):
776            print("Checksum %s exists" % checksum_path)
777            with open(checksum_path, "r") as f:
778                return f.read().replace('\n', '')
779        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
780        return md5sum_output.split()[0]
781
782
783    def update_checksum(self, pkg_path):
784        '''
785        Update the checksum of the package in the packages' checksum
786        file. This method is called whenever a package is fetched just
787        to be sure that the checksums in the local file are the latest.
788        pkg_path : The complete path to the package file.
789        '''
790        # Compute the new checksum
791        new_checksum = self.compute_checksum(pkg_path)
792        checksum_dict = self._get_checksum_dict()
793        checksum_dict[os.path.basename(pkg_path)] = new_checksum
794        self._save_checksum_dict(checksum_dict)
795
796
797    def remove_checksum(self, pkg_name):
798        '''
799        Remove the checksum of the package from the packages checksum file.
800        This method is called whenever a package is removed from the
801        repositories in order clean its corresponding checksum.
802        pkg_name :  The name of the package to be removed
803        '''
804        checksum_dict = self._get_checksum_dict()
805        if pkg_name in checksum_dict:
806            del checksum_dict[pkg_name]
807        self._save_checksum_dict(checksum_dict)
808
809
810    def compare_checksum(self, pkg_path):
811        '''
812        Calculate the checksum of the file specified in pkg_path and
813        compare it with the checksum in the checksum file
814        Return True if both match else return False.
815        pkg_path : The full path to the package file for which the
816                   checksum is being compared
817        '''
818        checksum_dict = self._get_checksum_dict()
819        package_name = os.path.basename(pkg_path)
820        if not checksum_dict or package_name not in checksum_dict:
821            return False
822
823        repository_checksum = checksum_dict[package_name]
824        local_checksum = self.compute_checksum(pkg_path)
825        return (local_checksum == repository_checksum)
826
827
828    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
829        '''
830        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
831        Excludes the directories specified in exclude_string while tarring
832        the source. Returns the tarball path.
833        '''
834        tarball_path = os.path.join(dest_dir, pkg_name)
835        temp_path = tarball_path + '.tmp'
836        cmd_list = ['tar', '-cf', temp_path, '-C', src_dir]
837        if _PBZIP2_AVAILABLE:
838            cmd_list.append('--use-compress-prog=pbzip2')
839        else:
840            cmd_list.append('-j')
841        if exclude_string is not None:
842            cmd_list.append(exclude_string)
843
844        try:
845            utils.system(' '.join(cmd_list))
846        except:
847            os.unlink(temp_path)
848            raise
849
850        os.rename(temp_path, tarball_path)
851        return tarball_path
852
853
854    def untar_required(self, tarball_path, dest_dir):
855        '''
856        Compare the checksum of the tarball_path with the .checksum file
857        in the dest_dir and return False if it matches. The untar
858        of the package happens only if the checksums do not match.
859        '''
860        checksum_path = os.path.join(dest_dir, '.checksum')
861        try:
862            existing_checksum = self._run_command('cat ' + checksum_path).stdout
863        except (error.CmdError, error.AutoservRunError):
864            # If the .checksum file is not present (generally, this should
865            # not be the case) then return True so that the untar happens
866            return True
867
868        new_checksum = self.compute_checksum(tarball_path)
869        return (new_checksum.strip() != existing_checksum.strip())
870
871
872    def untar_pkg(self, tarball_path, dest_dir):
873        '''
874        Untar the package present in the tarball_path and put a
875        ".checksum" file in the dest_dir containing the checksum
876        of the tarball. This method
877        assumes that the package to be untarred is of the form
878        <name>.tar.bz2
879        '''
880        self._run_command('tar --no-same-owner -xjf %s -C %s' %
881                          (tarball_path, dest_dir))
882        # Put the .checksum file in the install_dir to note
883        # where the package came from
884        pkg_checksum = self.compute_checksum(tarball_path)
885        pkg_checksum_path = os.path.join(dest_dir,
886                                         '.checksum')
887        self._run_command('echo "%s" > %s '
888                          % (pkg_checksum, pkg_checksum_path))
889
890
891    @staticmethod
892    def get_tarball_name(name, pkg_type):
893        """Converts a package name and type into a tarball name.
894
895        @param name: The name of the package
896        @param pkg_type: The type of the package
897
898        @returns A tarball filename for that specific type of package
899        """
900        assert '-' not in pkg_type
901        return '%s-%s.tar.bz2' % (pkg_type, name)
902
903
904    @staticmethod
905    def parse_tarball_name(tarball_name):
906        """Coverts a package tarball name into a package name and type.
907
908        @param tarball_name: The filename of the tarball
909
910        @returns (name, pkg_type) where name is the package name and pkg_type
911            is the package type.
912        """
913        match = re.search(r'^([^-]*)-(.*)\.tar\.bz2$', tarball_name)
914        pkg_type, name = match.groups()
915        return name, pkg_type
916
917
918    def is_url(self, url):
919        """Return true if path looks like a URL"""
920        return url.startswith('http://')
921
922
923    def get_package_name(self, url, pkg_type):
924        '''
925        Extract the group and test name for the url. This method is currently
926        used only for tests.
927        '''
928        if pkg_type == 'test':
929            regex = '[^:]+://(.*)/([^/]*)$'
930            return self._get_package_name(url, regex)
931        else:
932            return ('', url)
933
934
935    def _get_package_name(self, url, regex):
936        if not self.is_url(url):
937            if url.endswith('.tar.bz2'):
938                testname = url.replace('.tar.bz2', '')
939                testname = re.sub(r'(\d*)\.', '', testname)
940                return (testname, testname)
941            else:
942                return ('', url)
943
944        match = re.match(regex, url)
945        if not match:
946            return ('', url)
947        group, filename = match.groups()
948        # Generate the group prefix.
949        group = re.sub(r'\W', '_', group)
950        # Drop the extension to get the raw test name.
951        testname = re.sub(r'\.tar\.bz2', '', filename)
952        # Drop any random numbers at the end of the test name if any
953        testname = re.sub(r'\.(\d*)', '', testname)
954        return (group, testname)
955
956
957class SiteHttpFetcher(HttpFetcher):
958    curl_cmd_pattern = ('curl --connect-timeout 15 --retry 5 '
959                        '--retry-delay 5 --fail -s %s -o %s')
960
961    # shortcut quick http test for now since our dev server does not support
962    # this operation.
963    def _quick_http_test(self):
964        return
965
966
967class PackageManager(BasePackageManager):
968    def get_fetcher(self, url):
969        if url.startswith('http://'):
970            return SiteHttpFetcher(self, url)
971        else:
972            return super(PackageManager, self).get_fetcher(url)
973