1#!/usr/bin/env python 2 3# Copyright (C) 2018 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the 'License'); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an 'AS IS' BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Enforces common Android string best-practices. It ignores lint messages from 19a previous strings file, if provided. 20 21Usage: stringslint.py strings.xml 22Usage: stringslint.py strings.xml old_strings.xml 23 24In general: 25* Errors signal issues that must be fixed before submitting, and are only 26 used when there are no false-positives. 27* Warnings signal issues that might need to be fixed, but need manual 28 inspection due to risk of false-positives. 29* Info signal issues that should be fixed to match best-practices, such 30 as providing comments to aid translation. 31""" 32 33import re, sys, codecs 34import lxml.etree as ET 35 36reload(sys) 37sys.setdefaultencoding('utf8') 38 39BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = range(8) 40 41def format(fg=None, bg=None, bright=False, bold=False, dim=False, reset=False): 42 # manually derived from http://en.wikipedia.org/wiki/ANSI_escape_code#Codes 43 codes = [] 44 if reset: codes.append("0") 45 else: 46 if not fg is None: codes.append("3%d" % (fg)) 47 if not bg is None: 48 if not bright: codes.append("4%d" % (bg)) 49 else: codes.append("10%d" % (bg)) 50 if bold: codes.append("1") 51 elif dim: codes.append("2") 52 else: codes.append("22") 53 return "\033[%sm" % (";".join(codes)) 54 55warnings = None 56 57def warn(tag, msg, actual, expected, color=YELLOW): 58 global warnings 59 key = "%s:%d" % (tag.attrib["name"], hash(msg)) 60 value = "%sLine %d: '%s':%s %s" % (format(fg=color, bold=True), 61 tag.sourceline, 62 tag.attrib["name"], 63 format(reset=True), 64 msg) 65 if not actual is None: value += "\n\tActual: %s%s%s" % (format(dim=True), 66 actual, 67 format(reset=True)) 68 if not expected is None: value += "\n\tExample: %s%s%s" % (format(dim=True), 69 expected, 70 format(reset=True)) 71 warnings[key] = value 72 73 74def error(tag, msg, actual, expected): 75 warn(tag, msg, actual, expected, RED) 76 77def info(tag, msg, actual, expected): 78 warn(tag, msg, actual, expected, CYAN) 79 80# Escaping logic borrowed from https://stackoverflow.com/a/24519338 81ESCAPE_SEQUENCE_RE = re.compile(r''' 82 ( \\U........ # 8-digit hex escapes 83 | \\u.... # 4-digit hex escapes 84 | \\x.. # 2-digit hex escapes 85 | \\[0-7]{1,3} # Octal escapes 86 | \\N\{[^}]+\} # Unicode characters by name 87 | \\[\\'"abfnrtv] # Single-character escapes 88 )''', re.UNICODE | re.VERBOSE) 89 90def decode_escapes(s): 91 def decode_match(match): 92 return codecs.decode(match.group(0), 'unicode-escape') 93 94 s = re.sub(r"\n\s*", " ", s) 95 s = ESCAPE_SEQUENCE_RE.sub(decode_match, s) 96 s = re.sub(r"%(\d+\$)?[a-z]", "____", s) 97 s = re.sub(r"\^\d+", "____", s) 98 s = re.sub(r"<br/?>", "\n", s) 99 s = re.sub(r"</?[a-z]+>", "", s) 100 return s 101 102def sample_iter(tag): 103 if not isinstance(tag, ET._Comment) and re.match("{.*xliff.*}g", tag.tag) and "example" in tag.attrib: 104 yield tag.attrib["example"] 105 elif tag.text: 106 yield decode_escapes(tag.text) 107 for e in tag: 108 for v in sample_iter(e): 109 yield v 110 if e.tail: 111 yield decode_escapes(e.tail) 112 113def lint(path): 114 global warnings 115 warnings = {} 116 117 with open(path) as f: 118 raw = f.read() 119 if len(raw.strip()) == 0: 120 return warnings 121 tree = ET.fromstring(raw) 122 root = tree #tree.getroot() 123 124 last_comment = None 125 for child in root: 126 # TODO: handle plurals 127 if isinstance(child, ET._Comment): 128 last_comment = child 129 elif child.tag == "string": 130 # We always consume comment 131 comment = last_comment 132 last_comment = None 133 134 # Prepare string for analysis 135 text = "".join(child.itertext()) 136 sample = "".join(sample_iter(child)).strip().strip("'\"") 137 138 # Validate comment 139 if comment is None: 140 info(child, "Missing string comment to aid translation", 141 None, None) 142 continue 143 if "do not translate" in comment.text.lower(): 144 continue 145 if "translatable" in child.attrib and child.attrib["translatable"].lower() == "false": 146 continue 147 148 misspelled_attributes = [ 149 ("translateable", "translatable"), 150 ] 151 for misspelling, expected in misspelled_attributes: 152 if misspelling in child.attrib: 153 error(child, "Misspelled <string> attribute.", misspelling, expected) 154 155 limit = re.search("CHAR[ _-]LIMIT=(\d+|NONE|none)", comment.text) 156 if limit is None: 157 info(child, "Missing CHAR LIMIT to aid translation", 158 repr(comment), "<!-- Description of string [CHAR LIMIT=32] -->") 159 elif re.match("\d+", limit.group(1)): 160 limit = int(limit.group(1)) 161 if len(sample) > limit: 162 warn(child, "Expanded string length is larger than CHAR LIMIT", 163 sample, None) 164 165 # Look for common mistakes/substitutions 166 if "'" in text: 167 error(child, "Turned quotation mark glyphs are more polished", 168 text, "This doesn\u2019t need to \u2018happen\u2019 today") 169 if '"' in text and not text.startswith('"') and text.endswith('"'): 170 error(child, "Turned quotation mark glyphs are more polished", 171 text, "This needs to \u201chappen\u201d today") 172 if "..." in text: 173 error(child, "Ellipsis glyph is more polished", 174 text, "Loading\u2026") 175 if "wi-fi" in text.lower(): 176 error(child, "Non-breaking glyph is more polished", 177 text, "Wi\u2011Fi") 178 if "wifi" in text.lower(): 179 error(child, "Using non-standard spelling", 180 text, "Wi\u2011Fi") 181 if re.search("\d-\d", text): 182 warn(child, "Ranges should use en dash glyph", 183 text, "You will find this material in chapters 8\u201312") 184 if "--" in text: 185 warn(child, "Phrases should use em dash glyph", 186 text, "Upon discovering errors\u2014all 124 of them\u2014they recalled.") 187 if ". " in text: 188 warn(child, "Only use single space between sentences", 189 text, "First idea. Second idea.") 190 if re.match(r"^[A-Z\s]{5,}$", text): 191 warn(child, "Actions should use android:textAllCaps in layout; ignore if acronym", 192 text, "Refresh data") 193 if " phone " in text and "product" not in child.attrib: 194 warn(child, "Strings mentioning phones should have variants for tablets", 195 text, None) 196 197 # When more than one substitution, require indexes 198 if len(re.findall("%[^%]", text)) > 1: 199 if len(re.findall("%[^\d]", text)) > 0: 200 error(child, "Substitutions must be indexed", 201 text, "Add %1$s to %2$s") 202 203 # Require xliff substitutions 204 for gc in child.iter(): 205 badsub = False 206 if gc.tail and re.search("%[^%]", gc.tail): badsub = True 207 if re.match("{.*xliff.*}g", gc.tag): 208 if "id" not in gc.attrib: 209 error(child, "Substitutions must define id attribute", 210 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 211 if "example" not in gc.attrib: 212 error(child, "Substitutions must define example attribute", 213 None, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 214 else: 215 if gc.text and re.search("%[^%]", gc.text): badsub = True 216 if badsub: 217 error(child, "Substitutions must be inside xliff tags", 218 text, "<xliff:g id=\"domain\" example=\"example.com\">%1$s</xliff:g>") 219 220 return warnings 221 222if len(sys.argv) > 2: 223 before = lint(sys.argv[2]) 224else: 225 before = {} 226after = lint(sys.argv[1]) 227 228for b in before: 229 if b in after: 230 del after[b] 231 232if len(after) > 0: 233 for a in sorted(after.keys()): 234 print after[a] 235 print 236 sys.exit(1) 237