1# Copyright (C) 2020 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""A library containing functions for diffing XML elements."""
15import textwrap
16from typing import Any, Callable, Dict, Set
17import xml.etree.ElementTree as ET
18import dataclasses
19
20Element = ET.Element
21
22_INDENT = (' ' * 2)
23
24
25@dataclasses.dataclass
26class Change:
27  value_from: str
28  value_to: str
29
30  def __repr__(self):
31    return f'{self.value_from} -> {self.value_to}'
32
33
34@dataclasses.dataclass
35class ChangeMap:
36  """A collection of changes broken down by added, removed and modified.
37
38  Attributes:
39    added: A dictionary of string identifiers to the added string.
40    removed: A dictionary of string identifiers to the removed string.
41    modified: A dictionary of string identifiers to the changed object.
42  """
43  added: Dict[str, str] = dataclasses.field(default_factory=dict)
44  removed: Dict[str, str] = dataclasses.field(default_factory=dict)
45  modified: Dict[str, Any] = dataclasses.field(default_factory=dict)
46
47  def __repr__(self):
48    ret_str = ''
49    if self.added:
50      ret_str += 'Added:\n'
51      for value in self.added.values():
52        ret_str += textwrap.indent(str(value) + '\n', _INDENT)
53    if self.removed:
54      ret_str += 'Removed:\n'
55      for value in self.removed.values():
56        ret_str += textwrap.indent(str(value) + '\n', _INDENT)
57    if self.modified:
58      ret_str += 'Modified:\n'
59      for name, value in self.modified.items():
60        ret_str += textwrap.indent(name + ':\n', _INDENT)
61        ret_str += textwrap.indent(str(value) + '\n', _INDENT * 2)
62    return ret_str
63
64  def __bool__(self):
65    return bool(self.added) or bool(self.removed) or bool(self.modified)
66
67
68def element_string(e: Element) -> str:
69  return ET.tostring(e).decode(encoding='UTF-8').strip()
70
71
72def attribute_changes(e1: Element, e2: Element,
73                      ignored_attrs: Set[str]) -> ChangeMap:
74  """Get the changes in attributes between two XML elements.
75
76  Arguments:
77    e1: the first xml element.
78    e2: the second xml element.
79    ignored_attrs: a set of attribute names to ignore changes.
80
81  Returns:
82    A ChangeMap of attribute changes. Keyed by attribute name.
83  """
84  changes = ChangeMap()
85  attributes = set(e1.keys()) | set(e2.keys())
86  for attr in attributes:
87    if attr in ignored_attrs:
88      continue
89    a1 = e1.get(attr)
90    a2 = e2.get(attr)
91    if a1 == a2:
92      continue
93    elif not a1:
94      changes.added[attr] = a2 or ''
95    elif not a2:
96      changes.removed[attr] = a1
97    else:
98      changes.modified[attr] = Change(value_from=a1, value_to=a2)
99  return changes
100
101
102def compare_subelements(
103    tag: str,
104    p1: Element,
105    p2: Element,
106    ignored_attrs: Set[str],
107    key_fn: Callable[[Element], str],
108    diff_fn: Callable[[Element, Element, Set[str]], Any]) -> ChangeMap:
109  """Get the changes between subelements of two parent elements.
110
111  Arguments:
112    tag: tag name for children element.
113    p1: the base parent xml element.
114    p2: the parent xml element to compare
115    ignored_attrs: a set of attribute names to ignore changes.
116    key_fn: Function that takes a subelement and returns a key
117    diff_fn: Function that take two subelements and a set of ignored
118      attributes, returns the differences
119
120  Returns:
121    A ChangeMap object of the changes.
122  """
123  changes = ChangeMap()
124  group1 = {}
125  for e1 in p1.findall(tag):
126    group1[key_fn(e1)] = e1
127
128  for e2 in p2.findall(tag):
129    key = key_fn(e2)
130    e1 = group1.pop(key, None)
131    if e1 is None:
132      changes.added[key] = element_string(e2)
133    else:
134      echange = diff_fn(e1, e2, ignored_attrs)
135      if echange:
136        changes.modified[key] = echange
137
138  for name, e1 in group1.items():
139    changes.removed[name] = element_string(e1)
140
141  return changes
142