1import collections.abc
2import re
3from typing import (
4    Any,
5    Callable,
6    Dict,
7    List,
8    Mapping,
9    MutableMapping,
10    Optional,
11    Sequence,
12    Type,
13    Union,
14    IO,
15)
16import warnings
17from io import BytesIO
18from datetime import datetime
19from base64 import b64encode, b64decode
20from numbers import Integral
21from types import SimpleNamespace
22from functools import singledispatch
23
24from fontTools.misc import etree
25
26from fontTools.misc.py23 import tostr
27
28
29# By default, we
30#  - deserialize <data> elements as bytes and
31#  - serialize bytes as <data> elements.
32# Before, on Python 2, we
33#  - deserialized <data> elements as plistlib.Data objects, in order to
34#    distinguish them from the built-in str type (which is bytes on python2)
35#  - serialized bytes as <string> elements (they must have only contained
36#    ASCII characters in this case)
37# You can pass use_builtin_types=[True|False] to the load/dump etc. functions
38# to enforce a specific treatment.
39# NOTE that unicode type always maps to <string> element, and plistlib.Data
40# always maps to <data> element, regardless of use_builtin_types.
41USE_BUILTIN_TYPES = True
42
43XML_DECLARATION = b"""<?xml version='1.0' encoding='UTF-8'?>"""
44
45PLIST_DOCTYPE = (
46    b'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" '
47    b'"http://www.apple.com/DTDs/PropertyList-1.0.dtd">'
48)
49
50
51# Date should conform to a subset of ISO 8601:
52# YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'
53_date_parser = re.compile(
54    r"(?P<year>\d\d\d\d)"
55    r"(?:-(?P<month>\d\d)"
56    r"(?:-(?P<day>\d\d)"
57    r"(?:T(?P<hour>\d\d)"
58    r"(?::(?P<minute>\d\d)"
59    r"(?::(?P<second>\d\d))"
60    r"?)?)?)?)?Z",
61    re.ASCII,
62)
63
64
65def _date_from_string(s: str) -> datetime:
66    order = ("year", "month", "day", "hour", "minute", "second")
67    m = _date_parser.match(s)
68    if m is None:
69        raise ValueError(f"Expected ISO 8601 date string, but got '{s:r}'.")
70    gd = m.groupdict()
71    lst = []
72    for key in order:
73        val = gd[key]
74        if val is None:
75            break
76        lst.append(int(val))
77    # NOTE: mypy doesn't know that lst is 6 elements long.
78    return datetime(*lst)  # type:ignore
79
80
81def _date_to_string(d: datetime) -> str:
82    return "%04d-%02d-%02dT%02d:%02d:%02dZ" % (
83        d.year,
84        d.month,
85        d.day,
86        d.hour,
87        d.minute,
88        d.second,
89    )
90
91
92class Data:
93    """Represents binary data when ``use_builtin_types=False.``
94
95    This class wraps binary data loaded from a plist file when the
96    ``use_builtin_types`` argument to the loading function (:py:func:`fromtree`,
97    :py:func:`load`, :py:func:`loads`) is false.
98
99    The actual binary data is retrieved using the ``data`` attribute.
100    """
101
102    def __init__(self, data: bytes) -> None:
103        if not isinstance(data, bytes):
104            raise TypeError("Expected bytes, found %s" % type(data).__name__)
105        self.data = data
106
107    @classmethod
108    def fromBase64(cls, data: Union[bytes, str]) -> "Data":
109        return cls(b64decode(data))
110
111    def asBase64(self, maxlinelength: int = 76, indent_level: int = 1) -> bytes:
112        return _encode_base64(
113            self.data, maxlinelength=maxlinelength, indent_level=indent_level
114        )
115
116    def __eq__(self, other: Any) -> bool:
117        if isinstance(other, self.__class__):
118            return self.data == other.data
119        elif isinstance(other, bytes):
120            return self.data == other
121        else:
122            return NotImplemented
123
124    def __repr__(self) -> str:
125        return "%s(%s)" % (self.__class__.__name__, repr(self.data))
126
127
128def _encode_base64(
129    data: bytes, maxlinelength: Optional[int] = 76, indent_level: int = 1
130) -> bytes:
131    data = b64encode(data)
132    if data and maxlinelength:
133        # split into multiple lines right-justified to 'maxlinelength' chars
134        indent = b"\n" + b"  " * indent_level
135        max_length = max(16, maxlinelength - len(indent))
136        chunks = []
137        for i in range(0, len(data), max_length):
138            chunks.append(indent)
139            chunks.append(data[i : i + max_length])
140        chunks.append(indent)
141        data = b"".join(chunks)
142    return data
143
144
145# Mypy does not support recursive type aliases as of 0.782, Pylance does.
146# https://github.com/python/mypy/issues/731
147# https://devblogs.microsoft.com/python/pylance-introduces-five-new-features-that-enable-type-magic-for-python-developers/#1-support-for-recursive-type-aliases
148PlistEncodable = Union[
149    bool,
150    bytes,
151    Data,
152    datetime,
153    float,
154    int,
155    Mapping[str, Any],
156    Sequence[Any],
157    str,
158]
159
160
161class PlistTarget:
162    """Event handler using the ElementTree Target API that can be
163    passed to a XMLParser to produce property list objects from XML.
164    It is based on the CPython plistlib module's _PlistParser class,
165    but does not use the expat parser.
166
167    >>> from fontTools.misc import etree
168    >>> parser = etree.XMLParser(target=PlistTarget())
169    >>> result = etree.XML(
170    ...     "<dict>"
171    ...     "    <key>something</key>"
172    ...     "    <string>blah</string>"
173    ...     "</dict>",
174    ...     parser=parser)
175    >>> result == {"something": "blah"}
176    True
177
178    Links:
179    https://github.com/python/cpython/blob/master/Lib/plistlib.py
180    http://lxml.de/parsing.html#the-target-parser-interface
181    """
182
183    def __init__(
184        self,
185        use_builtin_types: Optional[bool] = None,
186        dict_type: Type[MutableMapping[str, Any]] = dict,
187    ) -> None:
188        self.stack: List[PlistEncodable] = []
189        self.current_key: Optional[str] = None
190        self.root: Optional[PlistEncodable] = None
191        if use_builtin_types is None:
192            self._use_builtin_types = USE_BUILTIN_TYPES
193        else:
194            if use_builtin_types is False:
195                warnings.warn(
196                    "Setting use_builtin_types to False is deprecated and will be "
197                    "removed soon.",
198                    DeprecationWarning,
199                )
200            self._use_builtin_types = use_builtin_types
201        self._dict_type = dict_type
202
203    def start(self, tag: str, attrib: Mapping[str, str]) -> None:
204        self._data: List[str] = []
205        handler = _TARGET_START_HANDLERS.get(tag)
206        if handler is not None:
207            handler(self)
208
209    def end(self, tag: str) -> None:
210        handler = _TARGET_END_HANDLERS.get(tag)
211        if handler is not None:
212            handler(self)
213
214    def data(self, data: str) -> None:
215        self._data.append(data)
216
217    def close(self) -> PlistEncodable:
218        if self.root is None:
219            raise ValueError("No root set.")
220        return self.root
221
222    # helpers
223
224    def add_object(self, value: PlistEncodable) -> None:
225        if self.current_key is not None:
226            stack_top = self.stack[-1]
227            if not isinstance(stack_top, collections.abc.MutableMapping):
228                raise ValueError("unexpected element: %r" % stack_top)
229            stack_top[self.current_key] = value
230            self.current_key = None
231        elif not self.stack:
232            # this is the root object
233            self.root = value
234        else:
235            stack_top = self.stack[-1]
236            if not isinstance(stack_top, list):
237                raise ValueError("unexpected element: %r" % stack_top)
238            stack_top.append(value)
239
240    def get_data(self) -> str:
241        data = "".join(self._data)
242        self._data = []
243        return data
244
245
246# event handlers
247
248
249def start_dict(self: PlistTarget) -> None:
250    d = self._dict_type()
251    self.add_object(d)
252    self.stack.append(d)
253
254
255def end_dict(self: PlistTarget) -> None:
256    if self.current_key:
257        raise ValueError("missing value for key '%s'" % self.current_key)
258    self.stack.pop()
259
260
261def end_key(self: PlistTarget) -> None:
262    if self.current_key or not isinstance(self.stack[-1], collections.abc.Mapping):
263        raise ValueError("unexpected key")
264    self.current_key = self.get_data()
265
266
267def start_array(self: PlistTarget) -> None:
268    a: List[PlistEncodable] = []
269    self.add_object(a)
270    self.stack.append(a)
271
272
273def end_array(self: PlistTarget) -> None:
274    self.stack.pop()
275
276
277def end_true(self: PlistTarget) -> None:
278    self.add_object(True)
279
280
281def end_false(self: PlistTarget) -> None:
282    self.add_object(False)
283
284
285def end_integer(self: PlistTarget) -> None:
286    self.add_object(int(self.get_data()))
287
288
289def end_real(self: PlistTarget) -> None:
290    self.add_object(float(self.get_data()))
291
292
293def end_string(self: PlistTarget) -> None:
294    self.add_object(self.get_data())
295
296
297def end_data(self: PlistTarget) -> None:
298    if self._use_builtin_types:
299        self.add_object(b64decode(self.get_data()))
300    else:
301        self.add_object(Data.fromBase64(self.get_data()))
302
303
304def end_date(self: PlistTarget) -> None:
305    self.add_object(_date_from_string(self.get_data()))
306
307
308_TARGET_START_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
309    "dict": start_dict,
310    "array": start_array,
311}
312
313_TARGET_END_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
314    "dict": end_dict,
315    "array": end_array,
316    "key": end_key,
317    "true": end_true,
318    "false": end_false,
319    "integer": end_integer,
320    "real": end_real,
321    "string": end_string,
322    "data": end_data,
323    "date": end_date,
324}
325
326
327# functions to build element tree from plist data
328
329
330def _string_element(value: str, ctx: SimpleNamespace) -> etree.Element:
331    el = etree.Element("string")
332    el.text = value
333    return el
334
335
336def _bool_element(value: bool, ctx: SimpleNamespace) -> etree.Element:
337    if value:
338        return etree.Element("true")
339    return etree.Element("false")
340
341
342def _integer_element(value: int, ctx: SimpleNamespace) -> etree.Element:
343    if -1 << 63 <= value < 1 << 64:
344        el = etree.Element("integer")
345        el.text = "%d" % value
346        return el
347    raise OverflowError(value)
348
349
350def _real_element(value: float, ctx: SimpleNamespace) -> etree.Element:
351    el = etree.Element("real")
352    el.text = repr(value)
353    return el
354
355
356def _dict_element(d: Mapping[str, PlistEncodable], ctx: SimpleNamespace) -> etree.Element:
357    el = etree.Element("dict")
358    items = d.items()
359    if ctx.sort_keys:
360        items = sorted(items)  # type: ignore
361    ctx.indent_level += 1
362    for key, value in items:
363        if not isinstance(key, str):
364            if ctx.skipkeys:
365                continue
366            raise TypeError("keys must be strings")
367        k = etree.SubElement(el, "key")
368        k.text = tostr(key, "utf-8")
369        el.append(_make_element(value, ctx))
370    ctx.indent_level -= 1
371    return el
372
373
374def _array_element(array: Sequence[PlistEncodable], ctx: SimpleNamespace) -> etree.Element:
375    el = etree.Element("array")
376    if len(array) == 0:
377        return el
378    ctx.indent_level += 1
379    for value in array:
380        el.append(_make_element(value, ctx))
381    ctx.indent_level -= 1
382    return el
383
384
385def _date_element(date: datetime, ctx: SimpleNamespace) -> etree.Element:
386    el = etree.Element("date")
387    el.text = _date_to_string(date)
388    return el
389
390
391def _data_element(data: bytes, ctx: SimpleNamespace) -> etree.Element:
392    el = etree.Element("data")
393    # NOTE: mypy is confused about whether el.text should be str or bytes.
394    el.text = _encode_base64(  # type: ignore
395        data,
396        maxlinelength=(76 if ctx.pretty_print else None),
397        indent_level=ctx.indent_level,
398    )
399    return el
400
401
402def _string_or_data_element(raw_bytes: bytes, ctx: SimpleNamespace) -> etree.Element:
403    if ctx.use_builtin_types:
404        return _data_element(raw_bytes, ctx)
405    else:
406        try:
407            string = raw_bytes.decode(encoding="ascii", errors="strict")
408        except UnicodeDecodeError:
409            raise ValueError(
410                "invalid non-ASCII bytes; use unicode string instead: %r" % raw_bytes
411            )
412        return _string_element(string, ctx)
413
414
415# The following is probably not entirely correct. The signature should take `Any`
416# and return `NoReturn`. At the time of this writing, neither mypy nor Pyright
417# can deal with singledispatch properly and will apply the signature of the base
418# function to all others. Being slightly dishonest makes it type-check and return
419# usable typing information for the optimistic case.
420@singledispatch
421def _make_element(value: PlistEncodable, ctx: SimpleNamespace) -> etree.Element:
422    raise TypeError("unsupported type: %s" % type(value))
423
424
425_make_element.register(str)(_string_element)
426_make_element.register(bool)(_bool_element)
427_make_element.register(Integral)(_integer_element)
428_make_element.register(float)(_real_element)
429_make_element.register(collections.abc.Mapping)(_dict_element)
430_make_element.register(list)(_array_element)
431_make_element.register(tuple)(_array_element)
432_make_element.register(datetime)(_date_element)
433_make_element.register(bytes)(_string_or_data_element)
434_make_element.register(bytearray)(_data_element)
435_make_element.register(Data)(lambda v, ctx: _data_element(v.data, ctx))
436
437
438# Public functions to create element tree from plist-compatible python
439# data structures and viceversa, for use when (de)serializing GLIF xml.
440
441
442def totree(
443    value: PlistEncodable,
444    sort_keys: bool = True,
445    skipkeys: bool = False,
446    use_builtin_types: Optional[bool] = None,
447    pretty_print: bool = True,
448    indent_level: int = 1,
449) -> etree.Element:
450    """Convert a value derived from a plist into an XML tree.
451
452    Args:
453        value: Any kind of value to be serialized to XML.
454        sort_keys: Whether keys of dictionaries should be sorted.
455        skipkeys (bool): Whether to silently skip non-string dictionary
456            keys.
457        use_builtin_types (bool): If true, byte strings will be
458            encoded in Base-64 and wrapped in a ``data`` tag; if
459            false, they will be either stored as ASCII strings or an
460            exception raised if they cannot be decoded as such. Defaults
461            to ``True`` if not present. Deprecated.
462        pretty_print (bool): Whether to indent the output.
463        indent_level (int): Level of indentation when serializing.
464
465    Returns: an ``etree`` ``Element`` object.
466
467    Raises:
468        ``TypeError``
469            if non-string dictionary keys are serialized
470            and ``skipkeys`` is false.
471        ``ValueError``
472            if non-ASCII binary data is present
473            and `use_builtin_types` is false.
474    """
475    if use_builtin_types is None:
476        use_builtin_types = USE_BUILTIN_TYPES
477    else:
478        use_builtin_types = use_builtin_types
479    context = SimpleNamespace(
480        sort_keys=sort_keys,
481        skipkeys=skipkeys,
482        use_builtin_types=use_builtin_types,
483        pretty_print=pretty_print,
484        indent_level=indent_level,
485    )
486    return _make_element(value, context)
487
488
489def fromtree(
490    tree: etree.Element,
491    use_builtin_types: Optional[bool] = None,
492    dict_type: Type[MutableMapping[str, Any]] = dict,
493) -> Any:
494    """Convert an XML tree to a plist structure.
495
496    Args:
497        tree: An ``etree`` ``Element``.
498        use_builtin_types: If True, binary data is deserialized to
499            bytes strings. If False, it is wrapped in :py:class:`Data`
500            objects. Defaults to True if not provided. Deprecated.
501        dict_type: What type to use for dictionaries.
502
503    Returns: An object (usually a dictionary).
504    """
505    target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
506    for action, element in etree.iterwalk(tree, events=("start", "end")):
507        if action == "start":
508            target.start(element.tag, element.attrib)
509        elif action == "end":
510            # if there are no children, parse the leaf's data
511            if not len(element):
512                # always pass str, not None
513                target.data(element.text or "")
514            target.end(element.tag)
515    return target.close()
516
517
518# python3 plistlib API
519
520
521def load(
522    fp: IO[bytes],
523    use_builtin_types: Optional[bool] = None,
524    dict_type: Type[MutableMapping[str, Any]] = dict,
525) -> Any:
526    """Load a plist file into an object.
527
528    Args:
529        fp: An opened file.
530        use_builtin_types: If True, binary data is deserialized to
531            bytes strings. If False, it is wrapped in :py:class:`Data`
532            objects. Defaults to True if not provided. Deprecated.
533        dict_type: What type to use for dictionaries.
534
535    Returns:
536        An object (usually a dictionary) representing the top level of
537        the plist file.
538    """
539
540    if not hasattr(fp, "read"):
541        raise AttributeError("'%s' object has no attribute 'read'" % type(fp).__name__)
542    target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
543    parser = etree.XMLParser(target=target)
544    result = etree.parse(fp, parser=parser)
545    # lxml returns the target object directly, while ElementTree wraps
546    # it as the root of an ElementTree object
547    try:
548        return result.getroot()
549    except AttributeError:
550        return result
551
552
553def loads(
554    value: bytes,
555    use_builtin_types: Optional[bool] = None,
556    dict_type: Type[MutableMapping[str, Any]] = dict,
557) -> Any:
558    """Load a plist file from a string into an object.
559
560    Args:
561        value: A bytes string containing a plist.
562        use_builtin_types: If True, binary data is deserialized to
563            bytes strings. If False, it is wrapped in :py:class:`Data`
564            objects. Defaults to True if not provided. Deprecated.
565        dict_type: What type to use for dictionaries.
566
567    Returns:
568        An object (usually a dictionary) representing the top level of
569        the plist file.
570    """
571
572    fp = BytesIO(value)
573    return load(fp, use_builtin_types=use_builtin_types, dict_type=dict_type)
574
575
576def dump(
577    value: PlistEncodable,
578    fp: IO[bytes],
579    sort_keys: bool = True,
580    skipkeys: bool = False,
581    use_builtin_types: Optional[bool] = None,
582    pretty_print: bool = True,
583) -> None:
584    """Write a Python object to a plist file.
585
586    Args:
587        value: An object to write.
588        fp: A file opened for writing.
589        sort_keys (bool): Whether keys of dictionaries should be sorted.
590        skipkeys (bool): Whether to silently skip non-string dictionary
591            keys.
592        use_builtin_types (bool): If true, byte strings will be
593            encoded in Base-64 and wrapped in a ``data`` tag; if
594            false, they will be either stored as ASCII strings or an
595            exception raised if they cannot be represented. Defaults
596        pretty_print (bool): Whether to indent the output.
597        indent_level (int): Level of indentation when serializing.
598
599    Raises:
600        ``TypeError``
601            if non-string dictionary keys are serialized
602            and ``skipkeys`` is false.
603        ``ValueError``
604            if non-representable binary data is present
605            and `use_builtin_types` is false.
606    """
607
608    if not hasattr(fp, "write"):
609        raise AttributeError("'%s' object has no attribute 'write'" % type(fp).__name__)
610    root = etree.Element("plist", version="1.0")
611    el = totree(
612        value,
613        sort_keys=sort_keys,
614        skipkeys=skipkeys,
615        use_builtin_types=use_builtin_types,
616        pretty_print=pretty_print,
617    )
618    root.append(el)
619    tree = etree.ElementTree(root)
620    # we write the doctype ourselves instead of using the 'doctype' argument
621    # of 'write' method, becuse lxml will force adding a '\n' even when
622    # pretty_print is False.
623    if pretty_print:
624        header = b"\n".join((XML_DECLARATION, PLIST_DOCTYPE, b""))
625    else:
626        header = XML_DECLARATION + PLIST_DOCTYPE
627    fp.write(header)
628    tree.write(  # type: ignore
629        fp,
630        encoding="utf-8",
631        pretty_print=pretty_print,
632        xml_declaration=False,
633    )
634
635
636def dumps(
637    value: PlistEncodable,
638    sort_keys: bool = True,
639    skipkeys: bool = False,
640    use_builtin_types: Optional[bool] = None,
641    pretty_print: bool = True,
642) -> bytes:
643    """Write a Python object to a string in plist format.
644
645    Args:
646        value: An object to write.
647        sort_keys (bool): Whether keys of dictionaries should be sorted.
648        skipkeys (bool): Whether to silently skip non-string dictionary
649            keys.
650        use_builtin_types (bool): If true, byte strings will be
651            encoded in Base-64 and wrapped in a ``data`` tag; if
652            false, they will be either stored as strings or an
653            exception raised if they cannot be represented. Defaults
654        pretty_print (bool): Whether to indent the output.
655        indent_level (int): Level of indentation when serializing.
656
657    Returns:
658        string: A plist representation of the Python object.
659
660    Raises:
661        ``TypeError``
662            if non-string dictionary keys are serialized
663            and ``skipkeys`` is false.
664        ``ValueError``
665            if non-representable binary data is present
666            and `use_builtin_types` is false.
667    """
668    fp = BytesIO()
669    dump(
670        value,
671        fp,
672        sort_keys=sort_keys,
673        skipkeys=skipkeys,
674        use_builtin_types=use_builtin_types,
675        pretty_print=pretty_print,
676    )
677    return fp.getvalue()
678