1#!/usr/bin/env python
2
3# Copyright (C) 2018 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the 'License');
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an 'AS IS' BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Enforces common Android string best-practices.  It ignores lint messages from
19a previous strings file, if provided.
20
21Usage: stringslint.py strings.xml
22Usage: stringslint.py strings.xml old_strings.xml
23
24In general:
25* Errors signal issues that must be fixed before submitting, and are only
26  used when there are no false-positives.
27* Warnings signal issues that might need to be fixed, but need manual
28  inspection due to risk of false-positives.
29* Info signal issues that should be fixed to match best-practices, such
30  as providing comments to aid translation.
31"""
32
33import re, sys, codecs
34import lxml.etree as ET
35
36reload(sys)
37sys.setdefaultencoding('utf8')
38
39BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8)
40
41def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False):
42    # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes
43    codes = []
44    if reset: codes.append("0")
45    else:
46        if not fg is None: codes.append("3%d" % (fg))
47        if not bg is None:
48            if not bright: codes.append("4%d" % (bg))
49            else: codes.append("10%d" % (bg))
50        if bold: codes.append("1")
51        elif dim: codes.append("2")
52        else: codes.append("22")
53    return "\033[%sm" % (";".join(codes))
54
55warnings = None
56
57def warn(tag, msg, actual, expected, color=YELLOW):
58    global warnings
59    key = "%s:%d" % (tag.attrib["name"], hash(msg))
60    value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True),
61                                       tag.sourceline,
62                                       tag.attrib["name"],
63                                       format(reset=True),
64                                       msg)
65    if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True),
66                                                            actual,
67                                                            format(reset=True))
68    if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True),
69                                                               expected,
70                                                               format(reset=True))
71    warnings[key] = value
72
73
74def error(tag, msg, actual, expected):
75    warn(tag, msg, actual, expected, RED)
76
77def info(tag, msg, actual, expected):
78    warn(tag, msg, actual, expected, CYAN)
79
80# Escaping logic borrowed from https://stackoverflow.com/a/24519338
81ESCAPE_SEQUENCE_RE = re.compile(r'''
82    ( \\U........      # 8-digit hex escapes
83    | \\u....          # 4-digit hex escapes
84    | \\x..            # 2-digit hex escapes
85    | \\[0-7]{1,3}     # Octal escapes
86    | \\N\{[^}]+\}     # Unicode characters by name
87    | \\[\\'"abfnrtv]  # Single-character escapes
88    )''', re.UNICODE | re.VERBOSE)
89
90def decode_escapes(s):
91    def decode_match(match):
92        return codecs.decode(match.group(0), 'unicode-escape')
93
94    s = re.sub(r"\n\s*", " ", s)
95    s = ESCAPE_SEQUENCE_RE.sub(decode_match, s)
96    s = re.sub(r"%(\d+\$)?[a-z]", "____", s)
97    s = re.sub(r"\^\d+", "____", s)
98    s = re.sub(r"<br/?>", "\n", s)
99    s = re.sub(r"</?[a-z]+>", "", s)
100    return s
101
102def sample_iter(tag):
103    if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib:
104        yield tag.attrib["example"]
105    elif tag.text:
106        yield decode_escapes(tag.text)
107    for e in tag:
108        for v in sample_iter(e):
109            yield v
110        if e.tail:
111            yield decode_escapes(e.tail)
112
113def lint(path):
114    global warnings
115    warnings = {}
116
117    with open(path) as f:
118        raw = f.read()
119        if len(raw.strip()) == 0:
120            return warnings
121        tree = ET.fromstring(raw)
122        root = tree #tree.getroot()
123
124    last_comment = None
125    for child in root:
126        # TODO: handle plurals
127        if isinstance(child, ET._Comment):
128            last_comment = child
129        elif child.tag == "string":
130            # We always consume comment
131            comment = last_comment
132            last_comment = None
133
134            # Prepare string for analysis
135            text = "".join(child.itertext())
136            sample = "".join(sample_iter(child)).strip().strip("'\"")
137
138            # Validate comment
139            if comment is None:
140                info(child, "Missing string comment to aid translation",
141                     None, None)
142                continue
143            if "do not translate" in comment.text.lower():
144                continue
145            if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false":
146                continue
147
148            misspelled_attributes = [
149              ("translateable", "translatable"),
150            ]
151            for misspelling, expected in misspelled_attributes:
152                if misspelling in child.attrib:
153                    error(child, "Misspelled <string> attribute.", misspelling, expected)
154
155            limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text)
156            if limit is None:
157                info(child, "Missing CHAR LIMIT to aid translation",
158                     repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->")
159            elif re.match("\d+", limit.group(1)):
160                limit = int(limit.group(1))
161                if len(sample) > limit:
162                    warn(child, "Expanded string length is larger than CHAR LIMIT",
163                        sample, None)
164
165            # Look for common mistakes/substitutions
166            if "'" in text:
167                error(child, "Turned quotation mark glyphs are more polished",
168                     text, "This doesn\u2019t need to \u2018happen\u2019 today")
169            if '"' in text and not text.startswith('"') and text.endswith('"'):
170                error(child, "Turned quotation mark glyphs are more polished",
171                     text, "This needs to \u201chappen\u201d today")
172            if "..." in text:
173                error(child, "Ellipsis glyph is more polished",
174                     text, "Loading\u2026")
175            if "wi-fi" in text.lower():
176                error(child, "Non-breaking glyph is more polished",
177                     text, "Wi\u2011Fi")
178            if "wifi" in text.lower():
179                error(child, "Using non-standard spelling",
180                     text, "Wi\u2011Fi")
181            if re.search("\d-\d", text):
182                warn(child, "Ranges should use en dash glyph",
183                     text, "You will find this material in chapters 8\u201312")
184            if "--" in text:
185                warn(child, "Phrases should use em dash glyph",
186                     text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.")
187            if ".  " in text:
188                warn(child, "Only use single space between sentences",
189                     text, "First idea. Second idea.")
190            if re.match(r"^[A-Z\s]{5,}$", text):
191                warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym",
192                     text, "Refresh data")
193            if " phone " in text and "product" not in child.attrib:
194                warn(child, "Strings mentioning phones should have variants for tablets",
195                     text, None)
196
197            # When more than one substitution, require indexes
198            if len(re.findall("%[^%]", text)) > 1:
199                if len(re.findall("%[^\d]", text)) > 0:
200                    error(child, "Substitutions must be indexed",
201                         text, "Add %1$s to %2$s")
202
203            # Require xliff substitutions
204            for gc in child.iter():
205                badsub = False
206                if gc.tail and re.search("%[^%]", gc.tail): badsub = True
207                if re.match("{.*xliff.*}g", gc.tag):
208                    if "id" not in gc.attrib:
209                        error(child, "Substitutions must define id attribute",
210                             None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
211                    if "example" not in gc.attrib:
212                        error(child, "Substitutions must define example attribute",
213                             None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
214                else:
215                    if gc.text and re.search("%[^%]", gc.text): badsub = True
216                if badsub:
217                    error(child, "Substitutions must be inside xliff tags",
218                         text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>")
219
220    return warnings
221
222if len(sys.argv) > 2:
223    before = lint(sys.argv[2])
224else:
225    before = {}
226after = lint(sys.argv[1])
227
228for b in before:
229    if b in after:
230        del after[b]
231
232if len(after) > 0:
233    for a in sorted(after.keys()):
234        print after[a]
235        print
236    sys.exit(1)
237