1#pylint: disable-msg=C0111
2
3"""
4This module defines the BasePackageManager Class which provides an
5implementation of the packaging system API providing methods to fetch,
6upload and remove packages. Site specific extensions to any of these methods
7should inherit this class.
8"""
9
10import fcntl, logging, os, re, shutil
11from autotest_lib.client.bin import os_dep
12from autotest_lib.client.common_lib import error, utils, global_config
13
14
15# the name of the checksum file that stores the packages' checksums
16CHECKSUM_FILE = "packages.checksum"
17
18
19def has_pbzip2():
20    '''Check if parallel bzip2 is available on this system.'''
21    try:
22        os_dep.command('pbzip2')
23    except ValueError:
24        return False
25    return True
26
27
28# is parallel bzip2 available for use?
29_PBZIP2_AVAILABLE = has_pbzip2()
30
31
32def parse_ssh_path(repo):
33    '''
34    Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
35    remote path
36    '''
37
38    match = re.search('^ssh://(.*?)(/.*)$', repo)
39    if match:
40        return match.groups()
41    else:
42        raise error.PackageUploadError(
43            "Incorrect SSH path in global_config: %s" % repo)
44
45
46def repo_run_command(repo, cmd, ignore_status=False, cd=True):
47    """Run a command relative to the repos path"""
48    repo = repo.strip()
49    run_cmd = None
50    cd_str = ''
51    if repo.startswith('ssh://'):
52        username = None
53        hostline, remote_path = parse_ssh_path(repo)
54        if cd:
55            cd_str = 'cd %s && ' % remote_path
56        if '@' in hostline:
57            username, host = hostline.split('@')
58            run_cmd = 'ssh %s@%s "%s%s"' % (username, host, cd_str, cmd)
59        else:
60            run_cmd = 'ssh %s "%s%s"' % (host, cd_str, cmd)
61
62    else:
63        if cd:
64            cd_str = 'cd %s && ' % repo
65        run_cmd = "%s%s" % (cd_str, cmd)
66
67    if run_cmd:
68        return utils.run(run_cmd, ignore_status=ignore_status)
69
70
71def create_directory(repo):
72    remote_path = repo
73    if repo.startswith('ssh://'):
74        _, remote_path = parse_ssh_path(repo)
75    repo_run_command(repo, 'mkdir -p %s' % remote_path, cd=False)
76
77
78def check_diskspace(repo, min_free=None):
79    # Note: 1 GB = 10**9 bytes (SI unit).
80    if min_free is None:
81        min_free = global_config.global_config.get_config_value('PACKAGES',
82                                                          'minimum_free_space',
83                                                          type=int, default=1)
84    try:
85        df = repo_run_command(repo,
86                              'df -PB %d . | tail -1' % 10 ** 9).stdout.split()
87        free_space_gb = int(df[3])
88    except Exception, e:
89        raise error.RepoUnknownError('Unknown Repo Error: %s' % e)
90    if free_space_gb < min_free:
91        raise error.RepoDiskFullError('Not enough disk space available '
92                                      '%sg < %sg' % (free_space_gb, min_free))
93
94
95def check_write(repo):
96    try:
97        repo_testfile = '.repo_test_file'
98        repo_run_command(repo, 'touch %s' % repo_testfile).stdout.strip()
99        repo_run_command(repo, 'rm ' + repo_testfile)
100    except error.CmdError:
101        raise error.RepoWriteError('Unable to write to ' + repo)
102
103
104def trim_custom_directories(repo, older_than_days=None):
105    if not repo:
106        return
107
108    if older_than_days is None:
109        older_than_days = global_config.global_config.get_config_value(
110            'PACKAGES', 'custom_max_age', type=int, default=40)
111    cmd = 'find . -type f -atime +%s -exec rm -f {} \;' % older_than_days
112    repo_run_command(repo, cmd, ignore_status=True)
113
114
115class RepositoryFetcher(object):
116    url = None
117
118
119    def fetch_pkg_file(self, filename, dest_path):
120        """ Fetch a package file from a package repository.
121
122        @param filename: The filename of the package file to fetch.
123        @param dest_path: Destination path to download the file to.
124
125        @raises PackageFetchError if the fetch failed
126        """
127        raise NotImplementedError()
128
129
130class HttpFetcher(RepositoryFetcher):
131    wget_cmd_pattern = 'wget --connect-timeout=15 -nv %s -O %s'
132
133
134    def __init__(self, package_manager, repository_url):
135        """
136        @param repository_url: The base URL of the http repository
137        """
138        self.run_command = package_manager._run_command
139        self.url = repository_url
140
141
142    def _quick_http_test(self):
143        """ Run a simple 30 second wget on the repository to see if it is
144        reachable. This avoids the need to wait for a full 10min timeout.
145        """
146        # just make a temp file to write a test fetch into
147        mktemp = 'mktemp -u /tmp/tmp.XXXXXX'
148        dest_file_path = self.run_command(mktemp).stdout.strip()
149
150        try:
151            # build up a wget command
152            http_cmd = self.wget_cmd_pattern % (self.url, dest_file_path)
153            try:
154                self.run_command(http_cmd, _run_command_dargs={'timeout': 30})
155            except Exception, e:
156                msg = 'HTTP test failed, unable to contact %s: %s'
157                raise error.PackageFetchError(msg % (self.url, e))
158        finally:
159            self.run_command('rm -rf %s' % dest_file_path)
160
161
162    def fetch_pkg_file(self, filename, dest_path):
163        logging.info('Fetching %s from %s to %s', filename, self.url,
164                     dest_path)
165
166        # do a quick test to verify the repo is reachable
167        self._quick_http_test()
168
169        # try to retrieve the package via http
170        package_url = os.path.join(self.url, filename)
171        try:
172            cmd = self.wget_cmd_pattern % (package_url, dest_path)
173            result = self.run_command(cmd,
174                                      _run_command_dargs={'timeout': 1200})
175
176            file_exists = self.run_command(
177                'ls %s' % dest_path,
178                _run_command_dargs={'ignore_status': True}).exit_status == 0
179            if not file_exists:
180                logging.error('wget failed: %s', result)
181                raise error.CmdError(cmd, result)
182
183            logging.debug('Successfully fetched %s from %s', filename,
184                          package_url)
185        except error.CmdError as e:
186            # remove whatever junk was retrieved when the get failed
187            self.run_command('rm -f %s' % dest_path)
188
189            raise error.PackageFetchError('%s not found in %s\n%s'
190                    'wget error code: %d' % (filename, package_url,
191                    e.result_obj.stderr, e.result_obj.exit_status))
192
193
194class LocalFilesystemFetcher(RepositoryFetcher):
195    def __init__(self, package_manager, local_dir):
196        self.run_command = package_manager._run_command
197        self.url = local_dir
198
199
200    def fetch_pkg_file(self, filename, dest_path):
201        logging.info('Fetching %s from %s to %s', filename, self.url,
202                     dest_path)
203        local_path = os.path.join(self.url, filename)
204        try:
205            self.run_command('cp %s %s' % (local_path, dest_path))
206            logging.debug('Successfully fetched %s from %s', filename,
207                          local_path)
208        except error.CmdError, e:
209            raise error.PackageFetchError(
210                'Package %s could not be fetched from %s'
211                % (filename, self.url), e)
212
213
214class BasePackageManager(object):
215    def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
216                 upload_paths=None, do_locking=True, run_function=utils.run,
217                 run_function_args=[], run_function_dargs={}):
218        '''
219        repo_urls: The list of the repository urls which is consulted
220                   whilst fetching the package
221        upload_paths: The list of the upload of repositories to which
222                      the package is uploaded to
223        pkgmgr_dir : A directory that can be used by the package manager
224                      to dump stuff (like checksum files of the repositories
225                      etc.).
226        do_locking : Enable locking when the packages are installed.
227
228        run_function is used to execute the commands throughout this file.
229        It defaults to utils.run() but a custom method (if provided) should
230        be of the same schema as utils.run. It should return a CmdResult
231        object and throw a CmdError exception. The reason for using a separate
232        function to run the commands is that the same code can be run to fetch
233        a package on the local machine or on a remote machine (in which case
234        ssh_host's run function is passed in for run_function).
235        '''
236        # In memory dictionary that stores the checksum's of packages
237        self._checksum_dict = {}
238
239        self.pkgmgr_dir = pkgmgr_dir
240        self.do_locking = do_locking
241        self.hostname = hostname
242        self.repositories = []
243
244        # Create an internal function that is a simple wrapper of
245        # run_function and takes in the args and dargs as arguments
246        def _run_command(command, _run_command_args=run_function_args,
247                         _run_command_dargs={}):
248            '''
249            Special internal function that takes in a command as
250            argument and passes it on to run_function (if specified).
251            The _run_command_dargs are merged into run_function_dargs
252            with the former having more precedence than the latter.
253            '''
254            new_dargs = dict(run_function_dargs)
255            new_dargs.update(_run_command_dargs)
256            # avoid polluting logs with extremely verbose packaging output
257            new_dargs.update({'stdout_tee' : None})
258
259            return run_function(command, *_run_command_args,
260                                **new_dargs)
261
262        self._run_command = _run_command
263
264        # Process the repository URLs
265        if not repo_urls:
266            repo_urls = []
267        elif hostname:
268            repo_urls = self.get_mirror_list(repo_urls)
269        for url in repo_urls:
270            self.add_repository(url)
271
272        # Process the upload URLs
273        if not upload_paths:
274            self.upload_paths = []
275        else:
276            self.upload_paths = list(upload_paths)
277
278
279    def add_repository(self, repo):
280        if isinstance(repo, basestring):
281            self.repositories.append(self.get_fetcher(repo))
282        elif isinstance(repo, RepositoryFetcher):
283            self.repositories.append(repo)
284        else:
285            raise TypeError("repo must be RepositoryFetcher or url string")
286
287
288    def get_fetcher(self, url):
289        if url.startswith('http://'):
290            return HttpFetcher(self, url)
291        else:
292            return LocalFilesystemFetcher(self, url)
293
294
295    def repo_check(self, repo):
296        '''
297        Check to make sure the repo is in a sane state:
298        ensure we have at least XX amount of free space
299        Make sure we can write to the repo
300        '''
301        if not repo.startswith('/') and not repo.startswith('ssh:'):
302            return
303        try:
304            create_directory(repo)
305            check_diskspace(repo)
306            check_write(repo)
307        except (error.RepoWriteError, error.RepoUnknownError,
308                error.RepoDiskFullError), e:
309            raise error.RepoError("ERROR: Repo %s: %s" % (repo, e))
310
311
312    def upkeep(self, custom_repos=None):
313        '''
314        Clean up custom upload/download areas
315        '''
316        from autotest_lib.server import subcommand
317        if not custom_repos:
318            # Not all package types necessarily require or allow custom repos
319            try:
320                custom_repos = global_config.global_config.get_config_value(
321                    'PACKAGES', 'custom_upload_location').split(',')
322            except global_config.ConfigError:
323                custom_repos = []
324            try:
325                custom_download = global_config.global_config.get_config_value(
326                    'PACKAGES', 'custom_download_location')
327                custom_repos += [custom_download]
328            except global_config.ConfigError:
329                pass
330
331            if not custom_repos:
332                return
333
334        subcommand.parallel_simple(trim_custom_directories, custom_repos,
335                                   log=False)
336
337
338    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
339                    preserve_install_dir=False, repo_url=None):
340        '''
341        Remove install_dir if it already exists and then recreate it unless
342        preserve_install_dir is specified as True.
343        Fetch the package into the pkg_dir. Untar the package into install_dir
344        The assumption is that packages are of the form :
345        <pkg_type>.<pkg_name>.tar.bz2
346        name        : name of the package
347        type        : type of the package
348        fetch_dir   : The directory into which the package tarball will be
349                      fetched to.
350        install_dir : the directory where the package files will be untarred to
351        repo_url    : the url of the repository to fetch the package from.
352        '''
353
354        # do_locking flag is on by default unless you disable it (typically
355        # in the cases where packages are directly installed from the server
356        # onto the client in which case fcntl stuff wont work as the code
357        # will run on the server in that case..
358        if self.do_locking:
359            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
360            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
361
362        try:
363            if self.do_locking:
364                fcntl.flock(lockfile, fcntl.LOCK_EX)
365
366            self._run_command('mkdir -p %s' % fetch_dir)
367
368            pkg_name = self.get_tarball_name(name, pkg_type)
369            fetch_path = os.path.join(fetch_dir, pkg_name)
370            try:
371                # Fetch the package into fetch_dir
372                self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
373
374                # check to see if the install_dir exists and if it does
375                # then check to see if the .checksum file is the latest
376                install_dir_exists = False
377                try:
378                    self._run_command("ls %s" % install_dir)
379                    install_dir_exists = True
380                except (error.CmdError, error.AutoservRunError):
381                    pass
382
383                if (install_dir_exists and
384                    not self.untar_required(fetch_path, install_dir)):
385                    return
386
387                # untar the package into install_dir and
388                # update the checksum in that directory
389                if not preserve_install_dir:
390                    # Make sure we clean up the install_dir
391                    self._run_command('rm -rf %s' % install_dir)
392                self._run_command('mkdir -p %s' % install_dir)
393
394                self.untar_pkg(fetch_path, install_dir)
395
396            except error.PackageFetchError, why:
397                raise error.PackageInstallError(
398                    'Installation of %s(type:%s) failed : %s'
399                    % (name, pkg_type, why))
400        finally:
401            if self.do_locking:
402                fcntl.flock(lockfile, fcntl.LOCK_UN)
403                lockfile.close()
404
405
406    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
407        '''
408        Fetch the package into dest_dir from repo_url. By default repo_url
409        is None and the package is looked in all the repositories specified.
410        Otherwise it fetches it from the specific repo_url.
411        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
412                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
413        repo_url     : the URL of the repository where the package is located.
414        dest_path    : complete path of where the package will be fetched to.
415        use_checksum : This is set to False to fetch the packages.checksum file
416                       so that the checksum comparison is bypassed for the
417                       checksum file itself. This is used internally by the
418                       packaging system. It should be ignored by externals
419                       callers of this method who use it fetch custom packages.
420        '''
421
422        try:
423            self._run_command("ls %s" % os.path.dirname(dest_path))
424        except (error.CmdError, error.AutoservRunError):
425            raise error.PackageFetchError("Please provide a valid "
426                                          "destination: %s " % dest_path)
427
428        # See if the package was already fetched earlier, if so
429        # the checksums need to be compared and the package is now
430        # fetched only if they differ.
431        pkg_exists = False
432        try:
433            self._run_command("ls %s" % dest_path)
434            pkg_exists = True
435        except (error.CmdError, error.AutoservRunError):
436            pass
437
438        # if a repository location is explicitly provided, fetch the package
439        # from there and return
440        if repo_url:
441            repositories = [self.get_fetcher(repo_url)]
442        elif self.repositories:
443            repositories = self.repositories
444        else:
445            raise error.PackageFetchError("No repository urls specified")
446
447        # install the package from the package repos, try the repos in
448        # reverse order, assuming that the 'newest' repos are most desirable
449        for fetcher in reversed(repositories):
450            try:
451                # Fetch the package if it is not there, the checksum does
452                # not match, or checksums are disabled entirely
453                need_to_fetch = (
454                        not use_checksum or not pkg_exists
455                        or not self.compare_checksum(dest_path))
456                if need_to_fetch:
457                    fetcher.fetch_pkg_file(pkg_name, dest_path)
458                    # update checksum so we won't refetch next time.
459                    if use_checksum:
460                        self.update_checksum(dest_path)
461                return
462            except (error.PackageFetchError, error.AutoservRunError) as e:
463                # The package could not be found in this repo, continue looking
464                logging.debug(e)
465
466        repo_url_list = [repo.url for repo in repositories]
467        message = ('%s could not be fetched from any of the repos %s' %
468                   (pkg_name, repo_url_list))
469        logging.error(message)
470        # if we got here then that means the package is not found
471        # in any of the repositories.
472        raise error.PackageFetchError(message)
473
474
475    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False,
476                   timeout=300):
477        from autotest_lib.server import subcommand
478        if upload_path:
479            upload_path_list = [upload_path]
480            self.upkeep(upload_path_list)
481        elif len(self.upload_paths) > 0:
482            self.upkeep()
483            upload_path_list = self.upload_paths
484        else:
485            raise error.PackageUploadError("Invalid Upload Path specified")
486
487        if update_checksum:
488            # get the packages' checksum file and update it with the current
489            # package's checksum
490            self.update_checksum(pkg_path)
491
492        commands = []
493        for path in upload_path_list:
494            commands.append(subcommand.subcommand(self.upload_pkg_parallel,
495                                                  (pkg_path, path,
496                                                   update_checksum)))
497
498        results = subcommand.parallel(commands, timeout, return_results=True)
499        for result in results:
500            if result:
501                print str(result)
502
503
504    # TODO(aganti): Fix the bug with the current checksum logic where
505    # packages' checksums that are not present consistently in all the
506    # repositories are not handled properly. This is a corner case though
507    # but the ideal solution is to make the checksum file repository specific
508    # and then maintain it.
509    def upload_pkg_parallel(self, pkg_path, upload_path, update_checksum=False):
510        '''
511        Uploads to a specified upload_path or to all the repos.
512        Also uploads the checksum file to all the repos.
513        pkg_path        : The complete path to the package file
514        upload_path     : the absolute path where the files are copied to.
515                          if set to 'None' assumes 'all' repos
516        update_checksum : If set to False, the checksum file is not
517                          going to be updated which happens by default.
518                          This is necessary for custom
519                          packages (like custom kernels and custom tests)
520                          that get uploaded which do not need to be part of
521                          the checksum file and bloat it.
522        '''
523        self.repo_check(upload_path)
524        # upload the package
525        if os.path.isdir(pkg_path):
526            self.upload_pkg_dir(pkg_path, upload_path)
527        else:
528            self.upload_pkg_file(pkg_path, upload_path)
529            if update_checksum:
530                self.upload_pkg_file(self._get_checksum_file_path(),
531                                     upload_path)
532
533
534    def upload_pkg_file(self, file_path, upload_path):
535        '''
536        Upload a single file. Depending on the upload path, the appropriate
537        method for that protocol is called. Currently this simply copies the
538        file to the target directory (but can be extended for other protocols)
539        This assumes that the web server is running on the same machine where
540        the method is being called from. The upload_path's files are
541        basically served by that web server.
542        '''
543        try:
544            if upload_path.startswith('ssh://'):
545                # parse ssh://user@host/usr/local/autotest/packages
546                hostline, remote_path = parse_ssh_path(upload_path)
547                try:
548                    utils.run('scp %s %s:%s' % (file_path, hostline,
549                                                remote_path))
550                    r_path = os.path.join(remote_path,
551                                          os.path.basename(file_path))
552                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, r_path))
553                except error.CmdError:
554                    logging.error("Error uploading to repository %s",
555                                  upload_path)
556            else:
557                shutil.copy(file_path, upload_path)
558                os.chmod(os.path.join(upload_path,
559                                      os.path.basename(file_path)), 0644)
560        except (IOError, os.error), why:
561            logging.error("Upload of %s to %s failed: %s", file_path,
562                          upload_path, why)
563
564
565    def upload_pkg_dir(self, dir_path, upload_path):
566        '''
567        Upload a full directory. Depending on the upload path, the appropriate
568        method for that protocol is called. Currently this copies the whole
569        tmp package directory to the target directory.
570        This assumes that the web server is running on the same machine where
571        the method is being called from. The upload_path's files are
572        basically served by that web server.
573        '''
574        local_path = os.path.join(dir_path, "*")
575        try:
576            if upload_path.startswith('ssh://'):
577                hostline, remote_path = parse_ssh_path(upload_path)
578                try:
579                    utils.run('scp %s %s:%s' % (local_path, hostline,
580                                                remote_path))
581                    ssh_path = os.path.join(remote_path, "*")
582                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
583                except error.CmdError:
584                    logging.error("Error uploading to repository: %s",
585                                  upload_path)
586            else:
587                utils.run("cp %s %s " % (local_path, upload_path))
588                up_path = os.path.join(upload_path, "*")
589                utils.run("chmod 644 %s" % up_path)
590        except (IOError, os.error), why:
591            raise error.PackageUploadError("Upload of %s to %s failed: %s"
592                                           % (dir_path, upload_path, why))
593
594
595    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
596        '''
597        Remove the package from the specified remove_path
598        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
599                                           dep-gcc.tar.bz2)
600        remove_path : the location to remove the package from.
601
602        '''
603        if remove_path:
604            remove_path_list = [remove_path]
605        elif len(self.upload_paths) > 0:
606            remove_path_list = self.upload_paths
607        else:
608            raise error.PackageRemoveError(
609                "Invalid path to remove the pkg from")
610
611        checksum_path = self._get_checksum_file_path()
612
613        if remove_checksum:
614            self.remove_checksum(pkg_name)
615
616        # remove the package and upload the checksum file to the repos
617        for path in remove_path_list:
618            self.remove_pkg_file(pkg_name, path)
619            self.upload_pkg_file(checksum_path, path)
620
621
622    def remove_pkg_file(self, filename, pkg_dir):
623        '''
624        Remove the file named filename from pkg_dir
625        '''
626        try:
627            # Remove the file
628            if pkg_dir.startswith('ssh://'):
629                hostline, remote_path = parse_ssh_path(pkg_dir)
630                path = os.path.join(remote_path, filename)
631                utils.run("ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
632                          path))
633            else:
634                os.remove(os.path.join(pkg_dir, filename))
635        except (IOError, os.error), why:
636            raise error.PackageRemoveError("Could not remove %s from %s: %s "
637                                           % (filename, pkg_dir, why))
638
639
640    def get_mirror_list(self, repo_urls):
641        '''
642            Stub function for site specific mirrors.
643
644            Returns:
645                Priority ordered list
646        '''
647        return repo_urls
648
649
650    def _get_checksum_file_path(self):
651        '''
652        Return the complete path of the checksum file (assumed to be stored
653        in self.pkgmgr_dir
654        '''
655        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
656
657
658    def _get_checksum_dict(self):
659        '''
660        Fetch the checksum file if not already fetched. If the checksum file
661        cannot be fetched from the repos then a new file is created with
662        the current package's (specified in pkg_path) checksum value in it.
663        Populate the local checksum dictionary with the values read from
664        the checksum file.
665        The checksum file is assumed to be present in self.pkgmgr_dir
666        '''
667        checksum_path = self._get_checksum_file_path()
668        if not self._checksum_dict:
669            # Fetch the checksum file
670            try:
671                try:
672                    self._run_command("ls %s" % checksum_path)
673                except (error.CmdError, error.AutoservRunError):
674                    # The packages checksum file does not exist locally.
675                    # See if it is present in the repositories.
676                    self.fetch_pkg(CHECKSUM_FILE, checksum_path)
677            except error.PackageFetchError:
678                # This should not happen whilst fetching a package..if a
679                # package is present in the repository, the corresponding
680                # checksum file should also be automatically present. This
681                # case happens only when a package
682                # is being uploaded and if it is the first package to be
683                # uploaded to the repos (hence no checksum file created yet)
684                # Return an empty dictionary in that case
685                return {}
686
687            # Read the checksum file into memory
688            checksum_file_contents = self._run_command('cat '
689                                                       + checksum_path).stdout
690
691            # Return {} if we have an empty checksum file present
692            if not checksum_file_contents.strip():
693                return {}
694
695            # Parse the checksum file contents into self._checksum_dict
696            for line in checksum_file_contents.splitlines():
697                checksum, package_name = line.split(None, 1)
698                self._checksum_dict[package_name] = checksum
699
700        return self._checksum_dict
701
702
703    def _save_checksum_dict(self, checksum_dict):
704        '''
705        Save the checksum dictionary onto the checksum file. Update the
706        local _checksum_dict variable with this new set of values.
707        checksum_dict :  New checksum dictionary
708        checksum_dir  :  The directory in which to store the checksum file to.
709        '''
710        checksum_path = self._get_checksum_file_path()
711        self._checksum_dict = checksum_dict.copy()
712        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
713                                      for pkg_name, checksum in
714                                      checksum_dict.iteritems())
715        # Write the checksum file back to disk
716        self._run_command('echo "%s" > %s' % (checksum_contents,
717                                              checksum_path),
718                          _run_command_dargs={'verbose': False})
719
720
721    def compute_checksum(self, pkg_path):
722        '''
723        Compute the MD5 checksum for the package file and return it.
724        pkg_path : The complete path for the package file
725        '''
726        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
727        return md5sum_output.split()[0]
728
729
730    def update_checksum(self, pkg_path):
731        '''
732        Update the checksum of the package in the packages' checksum
733        file. This method is called whenever a package is fetched just
734        to be sure that the checksums in the local file are the latest.
735        pkg_path : The complete path to the package file.
736        '''
737        # Compute the new checksum
738        new_checksum = self.compute_checksum(pkg_path)
739        checksum_dict = self._get_checksum_dict()
740        checksum_dict[os.path.basename(pkg_path)] = new_checksum
741        self._save_checksum_dict(checksum_dict)
742
743
744    def remove_checksum(self, pkg_name):
745        '''
746        Remove the checksum of the package from the packages checksum file.
747        This method is called whenever a package is removed from the
748        repositories in order clean its corresponding checksum.
749        pkg_name :  The name of the package to be removed
750        '''
751        checksum_dict = self._get_checksum_dict()
752        if pkg_name in checksum_dict:
753            del checksum_dict[pkg_name]
754        self._save_checksum_dict(checksum_dict)
755
756
757    def compare_checksum(self, pkg_path):
758        '''
759        Calculate the checksum of the file specified in pkg_path and
760        compare it with the checksum in the checksum file
761        Return True if both match else return False.
762        pkg_path : The full path to the package file for which the
763                   checksum is being compared
764        '''
765        checksum_dict = self._get_checksum_dict()
766        package_name = os.path.basename(pkg_path)
767        if not checksum_dict or package_name not in checksum_dict:
768            return False
769
770        repository_checksum = checksum_dict[package_name]
771        local_checksum = self.compute_checksum(pkg_path)
772        return (local_checksum == repository_checksum)
773
774
775    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
776        '''
777        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
778        Excludes the directories specified in exclude_string while tarring
779        the source. Returns the tarball path.
780        '''
781        tarball_path = os.path.join(dest_dir, pkg_name)
782        temp_path = tarball_path + '.tmp'
783        cmd_list = ['tar', '-cf', temp_path, '-C', src_dir]
784        if _PBZIP2_AVAILABLE:
785            cmd_list.append('--use-compress-prog=pbzip2')
786        else:
787            cmd_list.append('-j')
788        if exclude_string is not None:
789            cmd_list.append(exclude_string)
790
791        try:
792            utils.system(' '.join(cmd_list))
793        except:
794            os.unlink(temp_path)
795            raise
796
797        os.rename(temp_path, tarball_path)
798        return tarball_path
799
800
801    def untar_required(self, tarball_path, dest_dir):
802        '''
803        Compare the checksum of the tarball_path with the .checksum file
804        in the dest_dir and return False if it matches. The untar
805        of the package happens only if the checksums do not match.
806        '''
807        checksum_path = os.path.join(dest_dir, '.checksum')
808        try:
809            existing_checksum = self._run_command('cat ' + checksum_path).stdout
810        except (error.CmdError, error.AutoservRunError):
811            # If the .checksum file is not present (generally, this should
812            # not be the case) then return True so that the untar happens
813            return True
814
815        new_checksum = self.compute_checksum(tarball_path)
816        return (new_checksum.strip() != existing_checksum.strip())
817
818
819    def untar_pkg(self, tarball_path, dest_dir):
820        '''
821        Untar the package present in the tarball_path and put a
822        ".checksum" file in the dest_dir containing the checksum
823        of the tarball. This method
824        assumes that the package to be untarred is of the form
825        <name>.tar.bz2
826        '''
827        self._run_command('tar --no-same-owner -xjf %s -C %s' %
828                          (tarball_path, dest_dir))
829        # Put the .checksum file in the install_dir to note
830        # where the package came from
831        pkg_checksum = self.compute_checksum(tarball_path)
832        pkg_checksum_path = os.path.join(dest_dir,
833                                         '.checksum')
834        self._run_command('echo "%s" > %s '
835                          % (pkg_checksum, pkg_checksum_path))
836
837
838    @staticmethod
839    def get_tarball_name(name, pkg_type):
840        """Converts a package name and type into a tarball name.
841
842        @param name: The name of the package
843        @param pkg_type: The type of the package
844
845        @returns A tarball filename for that specific type of package
846        """
847        assert '-' not in pkg_type
848        return '%s-%s.tar.bz2' % (pkg_type, name)
849
850
851    @staticmethod
852    def parse_tarball_name(tarball_name):
853        """Coverts a package tarball name into a package name and type.
854
855        @param tarball_name: The filename of the tarball
856
857        @returns (name, pkg_type) where name is the package name and pkg_type
858            is the package type.
859        """
860        match = re.search(r'^([^-]*)-(.*)\.tar\.bz2$', tarball_name)
861        pkg_type, name = match.groups()
862        return name, pkg_type
863
864
865    def is_url(self, url):
866        """Return true if path looks like a URL"""
867        return url.startswith('http://')
868
869
870    def get_package_name(self, url, pkg_type):
871        '''
872        Extract the group and test name for the url. This method is currently
873        used only for tests.
874        '''
875        if pkg_type == 'test':
876            regex = '[^:]+://(.*)/([^/]*)$'
877            return self._get_package_name(url, regex)
878        else:
879            return ('', url)
880
881
882    def _get_package_name(self, url, regex):
883        if not self.is_url(url):
884            if url.endswith('.tar.bz2'):
885                testname = url.replace('.tar.bz2', '')
886                testname = re.sub(r'(\d*)\.', '', testname)
887                return (testname, testname)
888            else:
889                return ('', url)
890
891        match = re.match(regex, url)
892        if not match:
893            return ('', url)
894        group, filename = match.groups()
895        # Generate the group prefix.
896        group = re.sub(r'\W', '_', group)
897        # Drop the extension to get the raw test name.
898        testname = re.sub(r'\.tar\.bz2', '', filename)
899        # Drop any random numbers at the end of the test name if any
900        testname = re.sub(r'\.(\d*)', '', testname)
901        return (group, testname)
902