1import io
2import os
3import re
4import abc
5import csv
6import sys
7import email
8import pathlib
9import zipfile
10import operator
11import functools
12import itertools
13import posixpath
14import collections
15
16from configparser import ConfigParser
17from contextlib import suppress
18from importlib import import_module
19from importlib.abc import MetaPathFinder
20from itertools import starmap
21
22
23__all__ = [
24    'Distribution',
25    'DistributionFinder',
26    'PackageNotFoundError',
27    'distribution',
28    'distributions',
29    'entry_points',
30    'files',
31    'metadata',
32    'requires',
33    'version',
34    ]
35
36
37class PackageNotFoundError(ModuleNotFoundError):
38    """The package was not found."""
39
40
41class EntryPoint(
42        collections.namedtuple('EntryPointBase', 'name value group')):
43    """An entry point as defined by Python packaging conventions.
44
45    See `the packaging docs on entry points
46    <https://packaging.python.org/specifications/entry-points/>`_
47    for more information.
48    """
49
50    pattern = re.compile(
51        r'(?P<module>[\w.]+)\s*'
52        r'(:\s*(?P<attr>[\w.]+))?\s*'
53        r'(?P<extras>\[.*\])?\s*$'
54        )
55    """
56    A regular expression describing the syntax for an entry point,
57    which might look like:
58
59        - module
60        - package.module
61        - package.module:attribute
62        - package.module:object.attribute
63        - package.module:attr [extra1, extra2]
64
65    Other combinations are possible as well.
66
67    The expression is lenient about whitespace around the ':',
68    following the attr, and following any extras.
69    """
70
71    def load(self):
72        """Load the entry point from its definition. If only a module
73        is indicated by the value, return that module. Otherwise,
74        return the named object.
75        """
76        match = self.pattern.match(self.value)
77        module = import_module(match.group('module'))
78        attrs = filter(None, (match.group('attr') or '').split('.'))
79        return functools.reduce(getattr, attrs, module)
80
81    @property
82    def module(self):
83        match = self.pattern.match(self.value)
84        return match.group('module')
85
86    @property
87    def attr(self):
88        match = self.pattern.match(self.value)
89        return match.group('attr')
90
91    @property
92    def extras(self):
93        match = self.pattern.match(self.value)
94        return list(re.finditer(r'\w+', match.group('extras') or ''))
95
96    @classmethod
97    def _from_config(cls, config):
98        return [
99            cls(name, value, group)
100            for group in config.sections()
101            for name, value in config.items(group)
102            ]
103
104    @classmethod
105    def _from_text(cls, text):
106        config = ConfigParser(delimiters='=')
107        # case sensitive: https://stackoverflow.com/q/1611799/812183
108        config.optionxform = str
109        try:
110            config.read_string(text)
111        except AttributeError:  # pragma: nocover
112            # Python 2 has no read_string
113            config.readfp(io.StringIO(text))
114        return EntryPoint._from_config(config)
115
116    def __iter__(self):
117        """
118        Supply iter so one may construct dicts of EntryPoints easily.
119        """
120        return iter((self.name, self))
121
122    def __reduce__(self):
123        return (
124            self.__class__,
125            (self.name, self.value, self.group),
126            )
127
128
129class PackagePath(pathlib.PurePosixPath):
130    """A reference to a path in a package"""
131
132    def read_text(self, encoding='utf-8'):
133        with self.locate().open(encoding=encoding) as stream:
134            return stream.read()
135
136    def read_binary(self):
137        with self.locate().open('rb') as stream:
138            return stream.read()
139
140    def locate(self):
141        """Return a path-like object for this path"""
142        return self.dist.locate_file(self)
143
144
145class FileHash:
146    def __init__(self, spec):
147        self.mode, _, self.value = spec.partition('=')
148
149    def __repr__(self):
150        return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
151
152
153class Distribution:
154    """A Python distribution package."""
155
156    @abc.abstractmethod
157    def read_text(self, filename):
158        """Attempt to load metadata file given by the name.
159
160        :param filename: The name of the file in the distribution info.
161        :return: The text if found, otherwise None.
162        """
163
164    @abc.abstractmethod
165    def locate_file(self, path):
166        """
167        Given a path to a file in this distribution, return a path
168        to it.
169        """
170
171    @classmethod
172    def from_name(cls, name):
173        """Return the Distribution for the given package name.
174
175        :param name: The name of the distribution package to search for.
176        :return: The Distribution instance (or subclass thereof) for the named
177            package, if found.
178        :raises PackageNotFoundError: When the named package's distribution
179            metadata cannot be found.
180        """
181        for resolver in cls._discover_resolvers():
182            dists = resolver(DistributionFinder.Context(name=name))
183            dist = next(iter(dists), None)
184            if dist is not None:
185                return dist
186        else:
187            raise PackageNotFoundError(name)
188
189    @classmethod
190    def discover(cls, **kwargs):
191        """Return an iterable of Distribution objects for all packages.
192
193        Pass a ``context`` or pass keyword arguments for constructing
194        a context.
195
196        :context: A ``DistributionFinder.Context`` object.
197        :return: Iterable of Distribution objects for all packages.
198        """
199        context = kwargs.pop('context', None)
200        if context and kwargs:
201            raise ValueError("cannot accept context and kwargs")
202        context = context or DistributionFinder.Context(**kwargs)
203        return itertools.chain.from_iterable(
204            resolver(context)
205            for resolver in cls._discover_resolvers()
206            )
207
208    @staticmethod
209    def at(path):
210        """Return a Distribution for the indicated metadata path
211
212        :param path: a string or path-like object
213        :return: a concrete Distribution instance for the path
214        """
215        return PathDistribution(pathlib.Path(path))
216
217    @staticmethod
218    def _discover_resolvers():
219        """Search the meta_path for resolvers."""
220        declared = (
221            getattr(finder, 'find_distributions', None)
222            for finder in sys.meta_path
223            )
224        return filter(None, declared)
225
226    @classmethod
227    def _local(cls, root='.'):
228        from pep517 import build, meta
229        system = build.compat_system(root)
230        builder = functools.partial(
231            meta.build,
232            source_dir=root,
233            system=system,
234            )
235        return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
236
237    @property
238    def metadata(self):
239        """Return the parsed metadata for this Distribution.
240
241        The returned object will have keys that name the various bits of
242        metadata.  See PEP 566 for details.
243        """
244        text = (
245            self.read_text('METADATA')
246            or self.read_text('PKG-INFO')
247            # This last clause is here to support old egg-info files.  Its
248            # effect is to just end up using the PathDistribution's self._path
249            # (which points to the egg-info file) attribute unchanged.
250            or self.read_text('')
251            )
252        return email.message_from_string(text)
253
254    @property
255    def version(self):
256        """Return the 'Version' metadata for the distribution package."""
257        return self.metadata['Version']
258
259    @property
260    def entry_points(self):
261        return EntryPoint._from_text(self.read_text('entry_points.txt'))
262
263    @property
264    def files(self):
265        """Files in this distribution.
266
267        :return: List of PackagePath for this distribution or None
268
269        Result is `None` if the metadata file that enumerates files
270        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
271        missing.
272        Result may be empty if the metadata exists but is empty.
273        """
274        file_lines = self._read_files_distinfo() or self._read_files_egginfo()
275
276        def make_file(name, hash=None, size_str=None):
277            result = PackagePath(name)
278            result.hash = FileHash(hash) if hash else None
279            result.size = int(size_str) if size_str else None
280            result.dist = self
281            return result
282
283        return file_lines and list(starmap(make_file, csv.reader(file_lines)))
284
285    def _read_files_distinfo(self):
286        """
287        Read the lines of RECORD
288        """
289        text = self.read_text('RECORD')
290        return text and text.splitlines()
291
292    def _read_files_egginfo(self):
293        """
294        SOURCES.txt might contain literal commas, so wrap each line
295        in quotes.
296        """
297        text = self.read_text('SOURCES.txt')
298        return text and map('"{}"'.format, text.splitlines())
299
300    @property
301    def requires(self):
302        """Generated requirements specified for this Distribution"""
303        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
304        return reqs and list(reqs)
305
306    def _read_dist_info_reqs(self):
307        return self.metadata.get_all('Requires-Dist')
308
309    def _read_egg_info_reqs(self):
310        source = self.read_text('requires.txt')
311        return source and self._deps_from_requires_text(source)
312
313    @classmethod
314    def _deps_from_requires_text(cls, source):
315        section_pairs = cls._read_sections(source.splitlines())
316        sections = {
317            section: list(map(operator.itemgetter('line'), results))
318            for section, results in
319            itertools.groupby(section_pairs, operator.itemgetter('section'))
320            }
321        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
322
323    @staticmethod
324    def _read_sections(lines):
325        section = None
326        for line in filter(None, lines):
327            section_match = re.match(r'\[(.*)\]$', line)
328            if section_match:
329                section = section_match.group(1)
330                continue
331            yield locals()
332
333    @staticmethod
334    def _convert_egg_info_reqs_to_simple_reqs(sections):
335        """
336        Historically, setuptools would solicit and store 'extra'
337        requirements, including those with environment markers,
338        in separate sections. More modern tools expect each
339        dependency to be defined separately, with any relevant
340        extras and environment markers attached directly to that
341        requirement. This method converts the former to the
342        latter. See _test_deps_from_requires_text for an example.
343        """
344        def make_condition(name):
345            return name and 'extra == "{name}"'.format(name=name)
346
347        def parse_condition(section):
348            section = section or ''
349            extra, sep, markers = section.partition(':')
350            if extra and markers:
351                markers = '({markers})'.format(markers=markers)
352            conditions = list(filter(None, [markers, make_condition(extra)]))
353            return '; ' + ' and '.join(conditions) if conditions else ''
354
355        for section, deps in sections.items():
356            for dep in deps:
357                yield dep + parse_condition(section)
358
359
360class DistributionFinder(MetaPathFinder):
361    """
362    A MetaPathFinder capable of discovering installed distributions.
363    """
364
365    class Context:
366        """
367        Keyword arguments presented by the caller to
368        ``distributions()`` or ``Distribution.discover()``
369        to narrow the scope of a search for distributions
370        in all DistributionFinders.
371
372        Each DistributionFinder may expect any parameters
373        and should attempt to honor the canonical
374        parameters defined below when appropriate.
375        """
376
377        name = None
378        """
379        Specific name for which a distribution finder should match.
380        A name of ``None`` matches all distributions.
381        """
382
383        def __init__(self, **kwargs):
384            vars(self).update(kwargs)
385
386        @property
387        def path(self):
388            """
389            The path that a distribution finder should search.
390
391            Typically refers to Python package paths and defaults
392            to ``sys.path``.
393            """
394            return vars(self).get('path', sys.path)
395
396    @abc.abstractmethod
397    def find_distributions(self, context=Context()):
398        """
399        Find distributions.
400
401        Return an iterable of all Distribution instances capable of
402        loading the metadata for packages matching the ``context``,
403        a DistributionFinder.Context instance.
404        """
405
406
407class FastPath:
408    """
409    Micro-optimized class for searching a path for
410    children.
411    """
412
413    def __init__(self, root):
414        self.root = root
415        self.base = os.path.basename(self.root).lower()
416
417    def joinpath(self, child):
418        return pathlib.Path(self.root, child)
419
420    def children(self):
421        with suppress(Exception):
422            return os.listdir(self.root or '')
423        with suppress(Exception):
424            return self.zip_children()
425        return []
426
427    def zip_children(self):
428        zip_path = zipfile.Path(self.root)
429        names = zip_path.root.namelist()
430        self.joinpath = zip_path.joinpath
431
432        return dict.fromkeys(
433            child.split(posixpath.sep, 1)[0]
434            for child in names
435            )
436
437    def is_egg(self, search):
438        base = self.base
439        return (
440            base == search.versionless_egg_name
441            or base.startswith(search.prefix)
442            and base.endswith('.egg'))
443
444    def search(self, name):
445        for child in self.children():
446            n_low = child.lower()
447            if (n_low in name.exact_matches
448                    or n_low.startswith(name.prefix)
449                    and n_low.endswith(name.suffixes)
450                    # legacy case:
451                    or self.is_egg(name) and n_low == 'egg-info'):
452                yield self.joinpath(child)
453
454
455class Prepared:
456    """
457    A prepared search for metadata on a possibly-named package.
458    """
459    normalized = ''
460    prefix = ''
461    suffixes = '.dist-info', '.egg-info'
462    exact_matches = [''][:0]
463    versionless_egg_name = ''
464
465    def __init__(self, name):
466        self.name = name
467        if name is None:
468            return
469        self.normalized = name.lower().replace('-', '_')
470        self.prefix = self.normalized + '-'
471        self.exact_matches = [
472            self.normalized + suffix for suffix in self.suffixes]
473        self.versionless_egg_name = self.normalized + '.egg'
474
475
476class MetadataPathFinder(DistributionFinder):
477    @classmethod
478    def find_distributions(cls, context=DistributionFinder.Context()):
479        """
480        Find distributions.
481
482        Return an iterable of all Distribution instances capable of
483        loading the metadata for packages matching ``context.name``
484        (or all names if ``None`` indicated) along the paths in the list
485        of directories ``context.path``.
486        """
487        found = cls._search_paths(context.name, context.path)
488        return map(PathDistribution, found)
489
490    @classmethod
491    def _search_paths(cls, name, paths):
492        """Find metadata directories in paths heuristically."""
493        return itertools.chain.from_iterable(
494            path.search(Prepared(name))
495            for path in map(FastPath, paths)
496            )
497
498
499class PathDistribution(Distribution):
500    def __init__(self, path):
501        """Construct a distribution from a path to the metadata directory.
502
503        :param path: A pathlib.Path or similar object supporting
504                     .joinpath(), __div__, .parent, and .read_text().
505        """
506        self._path = path
507
508    def read_text(self, filename):
509        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
510                      NotADirectoryError, PermissionError):
511            return self._path.joinpath(filename).read_text(encoding='utf-8')
512    read_text.__doc__ = Distribution.read_text.__doc__
513
514    def locate_file(self, path):
515        return self._path.parent / path
516
517
518def distribution(distribution_name):
519    """Get the ``Distribution`` instance for the named package.
520
521    :param distribution_name: The name of the distribution package as a string.
522    :return: A ``Distribution`` instance (or subclass thereof).
523    """
524    return Distribution.from_name(distribution_name)
525
526
527def distributions(**kwargs):
528    """Get all ``Distribution`` instances in the current environment.
529
530    :return: An iterable of ``Distribution`` instances.
531    """
532    return Distribution.discover(**kwargs)
533
534
535def metadata(distribution_name):
536    """Get the metadata for the named package.
537
538    :param distribution_name: The name of the distribution package to query.
539    :return: An email.Message containing the parsed metadata.
540    """
541    return Distribution.from_name(distribution_name).metadata
542
543
544def version(distribution_name):
545    """Get the version string for the named package.
546
547    :param distribution_name: The name of the distribution package to query.
548    :return: The version string for the package as defined in the package's
549        "Version" metadata key.
550    """
551    return distribution(distribution_name).version
552
553
554def entry_points():
555    """Return EntryPoint objects for all installed packages.
556
557    :return: EntryPoint objects for all installed packages.
558    """
559    eps = itertools.chain.from_iterable(
560        dist.entry_points for dist in distributions())
561    by_group = operator.attrgetter('group')
562    ordered = sorted(eps, key=by_group)
563    grouped = itertools.groupby(ordered, by_group)
564    return {
565        group: tuple(eps)
566        for group, eps in grouped
567        }
568
569
570def files(distribution_name):
571    """Return a list of files for the named package.
572
573    :param distribution_name: The name of the distribution package to query.
574    :return: List of files composing the distribution.
575    """
576    return distribution(distribution_name).files
577
578
579def requires(distribution_name):
580    """
581    Return a list of requirements for the named package.
582
583    :return: An iterator of requirements, suitable for
584    packaging.requirement.Requirement.
585    """
586    return distribution(distribution_name).requires
587