1#! /usr/bin/python 2 3# Copyright (C) 2009-2012, International Business Machines Corporation, Google and Others. 4# All rights reserved. 5 6# 7# Script to check and fix svn property settings for CLDR source files. 8# This script is a modified version of ICU's icu-svnprops-check.py. 9# Also check for the correct line endings on files with svn:eol-style = native 10# 11# THIS SCRIPT DOES NOT WORK ON WINDOWS 12# It only works correctly on platforms where the native line ending is a plain \n 13# 14# usage: 15# cldr-svnprops-check.py [options] 16# 17# options: 18# -f | --fix Fix any problems that are found 19# -h | --help Print a usage line and exit. 20# 21# The tool operates recursively on the directory from which it is run. 22# Only files from the svn repository are checked. 23# No changes are made to the repository; only the working copy will be altered. 24 25import sys 26import os 27import os.path 28import re 29import getopt 30 31# 32# svn autoprops definitions. 33# Copy and paste here the ICU recommended auto-props from 34# http://icu-project.org/docs/subversion_howto/index.html 35# 36# This program will parse this autoprops string, and verify that files in 37# the repository have the recommeded properties set. 38# 39svn_auto_props = """ 40### Section for configuring automatic properties. 41[auto-props] 42### The format of the entries is: 43### file-name-pattern = propname[=value][;propname[=value]...] 44### The file-name-pattern can contain wildcards (such as '*' and 45### '?'). All entries which match will be applied to the file. 46### Note that auto-props functionality must be enabled, which 47### is typically done by setting the 'enable-auto-props' option. 48*.c = svn:eol-style=native 49*.cc = svn:eol-style=native 50*.cpp = svn:eol-style=native 51*.h = svn:eol-style=native 52*.rc = svn:eol-style=native 53*.dsp = svn:eol-style=native 54*.dsw = svn:eol-style=native 55*.sln = svn:eol-style=native 56*.vcproj = svn:eol-style=native 57configure = svn:eol-style=native;svn:executable 58*.sh = svn:eol-style=native;svn:executable 59*.pl = svn:eol-style=native;svn:executable 60*.py = svn:eol-style=native;svn:executable 61*.txt = svn:mime-type=text/plain;svn:eol-style=native 62*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 63*.ucm = svn:eol-style=native 64*.html = svn:eol-style=native;svn:mime-type=text/html 65*.htm = svn:eol-style=native;svn:mime-type=text/html 66*.xml = svn:eol-style=native 67Makefile = svn:eol-style=native 68*.in = svn:eol-style=native 69*.mak = svn:eol-style=native 70*.mk = svn:eol-style=native 71*.png = svn:mime-type=image/png 72*.jpeg = svn:mime-type=image/jpeg 73*.jpg = svn:mime-type=image/jpeg 74*.bin = svn:mime-type=application/octet-stream 75*.brk = svn:mime-type=application/octet-stream 76*.cnv = svn:mime-type=application/octet-stream 77*.dat = svn:mime-type=application/octet-stream 78*.icu = svn:mime-type=application/octet-stream 79*.res = svn:mime-type=application/octet-stream 80*.spp = svn:mime-type=application/octet-stream 81# new additions 2007-dec-5 srl 82*.rtf = mime-type=text/rtf 83*.pdf = mime-type=application/pdf 84# changed 2008-04-08: modified .txt, above, adding mime-type 85# changed 2010-11-09: modified .java, adding mime-type 86# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1 87""" 88 89 90# file_types: The parsed form of the svn auto-props specification. 91# A list of file types - .cc, .cpp, .txt, etc. 92# each element is a [type, proplist] 93# "type" is a regular expression string that will match a file name 94# prop list is another list, one element per property. 95# Each property item is a two element list, [prop name, prop value] 96file_types = list() 97 98def parse_auto_props(): 99 aprops = svn_auto_props.splitlines() 100 for propline in aprops: 101 if re.match("\s*(#.*)?$", propline): # Match comment and blank lines 102 continue 103 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. 104 continue 105 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = 106 print "Bad line from autoprops definitions: " + propline 107 continue 108 file_type, string_proplist = propline.split("=", 1) 109 110 #transform the file type expression from autoprops into a normal regular expression. 111 # e.g. "*.cpp" ==> ".*\.cpp$" 112 file_type = file_type.strip() 113 file_type = file_type.replace(".", "\.") 114 file_type = file_type.replace("*", ".*") 115 file_type = file_type + "$" 116 117 # example string_proplist at this point: " svn:eol-style=native;svn:executable" 118 # split on ';' into a list of properties. The negative lookahead and lookbehind 119 # in the split regexp are to prevent matching on ';;', which is an escaped ';' 120 # within a property value. 121 string_proplist = re.split("(?<!;);(?!;)", string_proplist) 122 proplist = list() 123 for prop in string_proplist: 124 if prop.find("=") >= 0: 125 prop_name, prop_val = prop.split("=", 1) 126 else: 127 # properties with no explicit value, e.g. svn:executable 128 prop_name, prop_val = prop, "" 129 prop_name = prop_name.strip() 130 prop_val = prop_val.strip() 131 # unescape any ";;" in a property value, e.g. the mime-type from 132 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 133 prop_val = prop_val.replace(";;", ";"); 134 proplist.append((prop_name, prop_val)) 135 136 file_types.append((file_type, proplist)) 137 # print file_types 138 139 140def runCommand(cmd): 141 output_file = os.popen(cmd); 142 output_text = output_file.read(); 143 exit_status = output_file.close(); 144 if exit_status: 145 print >>sys.stderr, '"', cmd, '" failed. Exiting.' 146 sys.exit(exit_status) 147 return output_text 148 149 150def usage(): 151 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" 152 153 154# 155# UTF-8 file check. For text files, add a charset to the mime-type if their contents are UTF-8 156# file_name: name of a text file. 157# base_mime_type: svn:mime-type property value from the auto-props file (no charset= part) 158# actual_mime_type: existing svn:mime-type property value for the file. 159# return: svn:mime-type property value, with charset added when appropriate. 160# 161def check_utf8(file_name, base_mime_type, actual_mime_type): 162 163 # If the file already has a charset in its mime-type, don't make any change. 164 165 if actual_mime_type.find("charset=") > 0: 166 return actual_mime_type; 167 168 f = open(file_name, 'r') 169 bytes = f.read() 170 f.close() 171 172 if all(ord(byte) < 128 for byte in bytes): 173 # pure ASCII. 174 # print "Pure ASCII " + file_name 175 return base_mime_type 176 177 try: 178 bytes.decode("UTF-8") 179 except UnicodeDecodeError: 180 print "warning: %s: not ASCII, not UTF-8" % file_name 181 return base_mime_type 182 183 if ord(bytes[0]) == 0xef: 184 print "UTF-8 file with BOM: " + file_name 185 186 # Append charset=utf-8. 187 return base_mime_type + ';charset=utf-8' 188 189 190def main(argv): 191 fix_problems = False; 192 try: 193 opts, args = getopt.getopt(argv, "fh", ("fix", "help")) 194 except getopt.GetoptError: 195 print "unrecognized option: " + argv[0] 196 usage() 197 sys.exit(2) 198 for opt, arg in opts: 199 if opt in ("-h", "--help"): 200 usage() 201 sys.exit() 202 if opt in ("-f", "--fix"): 203 fix_problems = True 204 if args: 205 print "unexpected command line argument" 206 usage() 207 sys.exit() 208 209 parse_auto_props() 210 output = runCommand("svn ls -R "); 211 file_list = output.splitlines() 212 213 for f in file_list: 214 if os.path.isdir(f): 215 # print "Skipping dir " + f 216 continue 217 if not os.path.isfile(f): 218 print "Repository file not in working copy: " + f 219 continue; 220 221 for file_pattern, props in file_types: 222 if re.match(file_pattern, f): 223 # print "doing " + f 224 for propname, propval in props: 225 actual_propval = runCommand("svn propget --strict " + propname + " '" + f + "'") 226 #print propname + ": " + actual_propval 227 if propname == "svn:mime-type" and propval.find("text/") == 0: 228 # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 229 propval = check_utf8(f, propval, actual_propval) 230 if not (propval == actual_propval or (propval == "" and actual_propval == "*")): 231 print "svn propset %s '%s' '%s'" % (propname, propval, f) 232 if fix_problems: 233 os.system("svn propset %s '%s' '%s'" % (propname, propval, f)) 234 if propname == "svn:eol-style" and propval == "native": 235 if os.system("grep -q -v \r '" + f + "'"): 236 if fix_problems: 237 print f + ": Removing DOS CR characters." 238 os.system("sed -i s/\r// '" + f + "'"); 239 else: 240 print f + " contains DOS CR characters." 241 242 243if __name__ == "__main__": 244 main(sys.argv[1:]) 245