1#! /usr/bin/python 2 3# Copyright (C) 2016 and later: Unicode, Inc. and others. 4# License & terms of use: http://www.unicode.org/copyright.html 5 6# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. 7# All rights reserved. 8 9# 10# Script to check and fix svn property settings for ICU source files. 11# Also check for the correct line endings on files with svn:eol-style = native 12# 13# THIS SCRIPT DOES NOT WORK ON WINDOWS 14# It only works correctly on platforms where the native line ending is a plain \n 15# 16# usage: 17# icu-svnprops-check.py [options] 18# 19# options: 20# -f | --fix Fix any problems that are found 21# -h | --help Print a usage line and exit. 22# 23# The tool operates recursively on the directory from which it is run. 24# Only files from the svn repository are checked. 25# No changes are made to the repository; only the working copy will be altered. 26 27import sys 28import os 29import os.path 30import re 31import getopt 32 33# 34# svn autoprops definitions. 35# Copy and paste here the ICU recommended auto-props from 36# http://icu-project.org/docs/subversion_howto/index.html 37# 38# This program will parse this autoprops string, and verify that files in 39# the repository have the recommeded properties set. 40# 41svn_auto_props = """ 42### Section for configuring automatic properties. 43[auto-props] 44### The format of the entries is: 45### file-name-pattern = propname[=value][;propname[=value]...] 46### The file-name-pattern can contain wildcards (such as '*' and 47### '?'). All entries which match will be applied to the file. 48### Note that auto-props functionality must be enabled, which 49### is typically done by setting the 'enable-auto-props' option. 50*.c = svn:eol-style=native 51*.cc = svn:eol-style=native 52*.cpp = svn:eol-style=native 53*.h = svn:eol-style=native 54*.rc = svn:eol-style=native 55*.dsp = svn:eol-style=native 56*.dsw = svn:eol-style=native 57*.sln = svn:eol-style=native 58*.vcproj = svn:eol-style=native 59configure = svn:eol-style=native;svn:executable 60*.sh = svn:eol-style=native;svn:executable 61*.pl = svn:eol-style=native;svn:executable 62*.py = svn:eol-style=native;svn:executable 63*.txt = svn:mime-type=text/plain;svn:eol-style=native 64*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 65*.ucm = svn:eol-style=native 66*.html = svn:eol-style=native;svn:mime-type=text/html 67*.htm = svn:eol-style=native;svn:mime-type=text/html 68*.xml = svn:eol-style=native 69Makefile = svn:eol-style=native 70*.in = svn:eol-style=native 71*.mak = svn:eol-style=native 72*.mk = svn:eol-style=native 73*.png = svn:mime-type=image/png 74*.jpeg = svn:mime-type=image/jpeg 75*.jpg = svn:mime-type=image/jpeg 76*.bin = svn:mime-type=application/octet-stream 77*.brk = svn:mime-type=application/octet-stream 78*.cnv = svn:mime-type=application/octet-stream 79*.dat = svn:mime-type=application/octet-stream 80*.icu = svn:mime-type=application/octet-stream 81*.res = svn:mime-type=application/octet-stream 82*.spp = svn:mime-type=application/octet-stream 83# new additions 2007-dec-5 srl 84*.rtf = mime-type=text/rtf 85*.pdf = mime-type=application/pdf 86# changed 2008-04-08: modified .txt, above, adding mime-type 87# changed 2010-11-09: modified .java, adding mime-type 88# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 89""" 90 91 92# file_types: The parsed form of the svn auto-props specification. 93# A list of file types - .cc, .cpp, .txt, etc. 94# each element is a [type, proplist] 95# "type" is a regular expression string that will match a file name 96# prop list is another list, one element per property. 97# Each property item is a two element list, [prop name, prop value] 98file_types = list() 99 100def parse_auto_props(): 101 aprops = svn_auto_props.splitlines() 102 for propline in aprops: 103 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines 104 continue 105 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. 106 continue 107 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = 108 print "Bad line from autoprops definitions: " + propline 109 continue 110 file_type, string_proplist = propline.split("=", 1) 111 112 #transform the file type expression from autoprops into a normal regular expression. 113 # e.g. "*.cpp" ==> ".*\.cpp$" 114 file_type = file_type.strip() 115 file_type = file_type.replace(".", "\.") 116 file_type = file_type.replace("*", ".*") 117 file_type = file_type + "$" 118 119 # example string_proplist at this point: " svn:eol-style=native;svn:executable" 120 # split on ';' into a list of properties. The negative lookahead and lookbehind 121 # in the split regexp are to prevent matching on ';;', which is an escaped ';' 122 # within a property value. 123 string_proplist = re.split("(?<!;);(?!;)", string_proplist) 124 proplist = list() 125 for prop in string_proplist: 126 if prop.find("=") >= 0: 127 prop_name, prop_val = prop.split("=", 1) 128 else: 129 # properties with no explicit value, e.g. svn:executable 130 prop_name, prop_val = prop, "" 131 prop_name = prop_name.strip() 132 prop_val = prop_val.strip() 133 # unescape any ";;" in a property value, e.g. the mime-type from 134 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 135 prop_val = prop_val.replace(";;", ";"); 136 proplist.append((prop_name, prop_val)) 137 138 file_types.append((file_type, proplist)) 139 # print file_types 140 141 142def runCommand(cmd): 143 output_file = os.popen(cmd); 144 output_text = output_file.read(); 145 exit_status = output_file.close(); 146 if exit_status: 147 print >>sys.stderr, '"', cmd, '" failed. Exiting.' 148 sys.exit(exit_status) 149 return output_text 150 151 152def usage(): 153 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" 154 155 156# 157# UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 158# file_name: name of a text file. 159# base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) 160# actual_mime_type: existing svn:mime-type property value for the file. 161# return: svn:mime-type property value, with charset added when appropriate. 162# 163def check_utf8(file_name, base_mime_type, actual_mime_type): 164 165 # If the file already has a charset in its mime-type, don't make any change. 166 167 if actual_mime_type.find("charset=") > 0: 168 return actual_mime_type; 169 170 f = open(file_name, 'r') 171 bytes = f.read() 172 f.close() 173 174 if all(ord(byte) < 128 for byte in bytes): 175 # pure ASCII. 176 # print "Pure ASCII " + file_name 177 return base_mime_type 178 179 try: 180 bytes.decode("UTF-8") 181 except UnicodeDecodeError: 182 print "warning: %s: not ASCII, not UTF-8" % file_name 183 return base_mime_type 184 185 if ord(bytes[0]) != 0xef: 186 print "UTF-8 file with no BOM: " + file_name 187 188 # Append charset=utf-8. 189 return base_mime_type + ';charset=utf-8' 190 191 192def main(argv): 193 fix_problems = False; 194 try: 195 opts, args = getopt.getopt(argv, "fh", ("fix", "help")) 196 except getopt.GetoptError: 197 print "unrecognized option: " + argv[0] 198 usage() 199 sys.exit(2) 200 for opt, arg in opts: 201 if opt in ("-h", "--help"): 202 usage() 203 sys.exit() 204 if opt in ("-f", "--fix"): 205 fix_problems = True 206 if args: 207 print "unexpected command line argument" 208 usage() 209 sys.exit() 210 211 parse_auto_props() 212 output = runCommand("svn ls -R "); 213 file_list = output.splitlines() 214 215 for f in file_list: 216 if os.path.isdir(f): 217 # print "Skipping dir " + f 218 continue 219 if not os.path.isfile(f): 220 print "Repository file not in working copy: " + f 221 continue; 222 223 for file_pattern, props in file_types: 224 if re.match(file_pattern, f): 225 # print "doing " + f 226 for propname, propval in props: 227 actual_propval = runCommand("svn propget --strict " + propname + " " + f) 228 #print propname + ": " + actual_propval 229 if propname == "svn:mime-type" and propval.find("text/") == 0: 230 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 231 propval = check_utf8(f, propval, actual_propval) 232 if not (propval == actual_propval or (propval == "" and actual_propval == "*")): 233 print "svn propset %s '%s' %s" % (propname, propval, f) 234 if fix_problems: 235 os.system("svn propset %s '%s' %s" % (propname, propval, f)) 236 if propname == "svn:eol-style" and propval == "native": 237 if os.system("grep -q -v \r " + f): 238 if fix_problems: 239 print f + ": Removing DOS CR characters." 240 os.system("sed -i s/\r// " + f); 241 else: 242 print f + " contains DOS CR characters." 243 244 245if __name__ == "__main__": 246 main(sys.argv[1:]) 247