1#!/usr/bin/env python
2
3from __future__ import print_function
4import sys, os, re, difflib, unicodedata, errno, cgi
5from itertools import *
6
7diff_symbols = "-+=*&^%$#@!~/"
8diff_colors = ['red', 'green', 'blue']
9
10if sys.version_info[0] >= 3:
11	unichr = chr
12
13class ColorFormatter:
14
15	class Null:
16		@staticmethod
17		def start_color (c): return ''
18		@staticmethod
19		def end_color (): return ''
20		@staticmethod
21		def escape (s): return s
22		@staticmethod
23		def newline (): return '\n'
24
25	class ANSI:
26		@staticmethod
27		def start_color (c):
28			return {
29				'red': '\033[41;37;1m',
30				'green': '\033[42;37;1m',
31				'blue': '\033[44;37;1m',
32			}[c]
33		@staticmethod
34		def end_color ():
35			return '\033[m'
36		@staticmethod
37		def escape (s): return s
38		@staticmethod
39		def newline (): return '\n'
40
41	class HTML:
42		@staticmethod
43		def start_color (c):
44			return '<span style="background:%s">' % c
45		@staticmethod
46		def end_color ():
47			return '</span>'
48		@staticmethod
49		def escape (s): return cgi.escape (s)
50		@staticmethod
51		def newline (): return '<br/>\n'
52
53	@staticmethod
54	def Auto (argv = [], out = sys.stdout):
55		format = ColorFormatter.ANSI
56		if "--format" in argv:
57			argv.remove ("--format")
58			format = ColorFormatter.ANSI
59		if "--format=ansi" in argv:
60			argv.remove ("--format=ansi")
61			format = ColorFormatter.ANSI
62		if "--format=html" in argv:
63			argv.remove ("--format=html")
64			format = ColorFormatter.HTML
65		if "--no-format" in argv:
66			argv.remove ("--no-format")
67			format = ColorFormatter.Null
68		return format
69
70
71class DiffColorizer:
72
73	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
74
75	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
76		self.formatter = formatter
77		self.colors = colors
78		self.symbols = symbols
79
80	def colorize_lines (self, lines):
81		lines = (l if l else '' for l in lines)
82		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
83		oo = ["",""]
84		st = [False, False]
85		for l in difflib.Differ().compare (*ss):
86			if l[0] == '?':
87				continue
88			if l[0] == ' ':
89				for i in range(2):
90					if st[i]:
91						oo[i] += self.formatter.end_color ()
92						st[i] = False
93				oo = [o + self.formatter.escape (l[2:]) for o in oo]
94				continue
95			if l[0] in self.symbols:
96				i = self.symbols.index (l[0])
97				if not st[i]:
98					oo[i] += self.formatter.start_color (self.colors[i])
99					st[i] = True
100				oo[i] += self.formatter.escape (l[2:])
101				continue
102		for i in range(2):
103			if st[i]:
104				oo[i] += self.formatter.end_color ()
105				st[i] = False
106		oo = [o.replace ('\n', '') for o in oo]
107		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
108
109	def colorize_diff (self, f):
110		lines = [None, None]
111		for l in f:
112			if l[0] not in self.symbols:
113				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
114				continue
115			i = self.symbols.index (l[0])
116			if lines[i]:
117				# Flush
118				for line in self.colorize_lines (lines):
119					yield line
120				lines = [None, None]
121			lines[i] = l[1:]
122			if (all (lines)):
123				# Flush
124				for line in self.colorize_lines (lines):
125					yield line
126				lines = [None, None]
127		if (any (lines)):
128			# Flush
129			for line in self.colorize_lines (lines):
130				yield line
131
132
133class ZipDiffer:
134
135	@staticmethod
136	def diff_files (files, symbols=diff_symbols):
137		files = tuple (files) # in case it's a generator, copy it
138		try:
139			for lines in izip_longest (*files):
140				if all (lines[0] == line for line in lines[1:]):
141					sys.stdout.writelines ([" ", lines[0]])
142					continue
143
144				for i, l in enumerate (lines):
145					if l:
146						sys.stdout.writelines ([symbols[i], l])
147		except IOError as e:
148			if e.errno != errno.EPIPE:
149				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
150				sys.exit (1)
151
152
153class DiffFilters:
154
155	@staticmethod
156	def filter_failures (f):
157		for key, lines in DiffHelpers.separate_test_cases (f):
158			lines = list (lines)
159			if not DiffHelpers.test_passed (lines):
160				for l in lines: yield l
161
162class Stat:
163
164	def __init__ (self):
165		self.count = 0
166		self.freq = 0
167
168	def add (self, test):
169		self.count += 1
170		self.freq += test.freq
171
172class Stats:
173
174	def __init__ (self):
175		self.passed = Stat ()
176		self.failed = Stat ()
177		self.total  = Stat ()
178
179	def add (self, test):
180		self.total.add (test)
181		if test.passed:
182			self.passed.add (test)
183		else:
184			self.failed.add (test)
185
186	def mean (self):
187		return float (self.passed.count) / self.total.count
188
189	def variance (self):
190		return (float (self.passed.count) / self.total.count) * \
191		       (float (self.failed.count) / self.total.count)
192
193	def stddev (self):
194		return self.variance () ** .5
195
196	def zscore (self, population):
197		"""Calculate the standard score.
198		   Population is the Stats for population.
199		   Self is Stats for sample.
200		   Returns larger absolute value if sample is highly unlikely to be random.
201		   Anything outside of -3..+3 is very unlikely to be random.
202		   See: http://en.wikipedia.org/wiki/Standard_score"""
203
204		return (self.mean () - population.mean ()) / population.stddev ()
205
206
207
208
209class DiffSinks:
210
211	@staticmethod
212	def print_stat (f):
213		passed = 0
214		failed = 0
215		# XXX port to Stats, but that would really slow us down here
216		for key, lines in DiffHelpers.separate_test_cases (f):
217			if DiffHelpers.test_passed (lines):
218				passed += 1
219			else:
220				failed += 1
221		total = passed + failed
222		print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
223
224	@staticmethod
225	def print_ngrams (f, ns=(1,2,3)):
226		gens = tuple (Ngram.generator (n) for n in ns)
227		allstats = Stats ()
228		allgrams = {}
229		for key, lines in DiffHelpers.separate_test_cases (f):
230			test = Test (lines)
231			allstats.add (test)
232
233			for gen in gens:
234				for ngram in gen (test.unicodes):
235					if ngram not in allgrams:
236						allgrams[ngram] = Stats ()
237					allgrams[ngram].add (test)
238
239		importantgrams = {}
240		for ngram, stats in allgrams.iteritems ():
241			if stats.failed.count >= 30: # for statistical reasons
242				importantgrams[ngram] = stats
243		allgrams = importantgrams
244		del importantgrams
245
246		for ngram, stats in allgrams.iteritems ():
247			print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
248
249
250
251class Test:
252
253	def __init__ (self, lines):
254		self.freq = 1
255		self.passed = True
256		self.identifier = None
257		self.text = None
258		self.unicodes = None
259		self.glyphs = None
260		for l in lines:
261			symbol = l[0]
262			if symbol != ' ':
263				self.passed = False
264			i = 1
265			if ':' in l:
266				i = l.index (':')
267				if not self.identifier:
268					self.identifier = l[1:i]
269				i = i + 2 # Skip colon and space
270			j = -1
271			if l[j] == '\n':
272				j -= 1
273			brackets = l[i] + l[j]
274			l = l[i+1:-2]
275			if brackets == '()':
276				self.text = l
277			elif brackets == '<>':
278				self.unicodes = Unicode.parse (l)
279			elif brackets == '[]':
280				# XXX we don't handle failed tests here
281				self.glyphs = l
282
283
284class DiffHelpers:
285
286	@staticmethod
287	def separate_test_cases (f):
288		'''Reads lines from f, and if the lines have identifiers, ie.
289		   have a colon character, groups them by identifier,
290		   yielding lists of all lines with the same identifier.'''
291
292		def identifier (l):
293			if ':' in l[1:]:
294				return l[1:l.index (':')]
295			return l
296		return groupby (f, key=identifier)
297
298	@staticmethod
299	def test_passed (lines):
300		lines = list (lines)
301		# XXX This is a hack, but does the job for now.
302		if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
303		if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
304		if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
305		if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
306		if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
307		if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
308		return all (l[0] == ' ' for l in lines)
309
310
311class FilterHelpers:
312
313	@staticmethod
314	def filter_printer_function (filter_callback):
315		def printer (f):
316			for line in filter_callback (f):
317				print (line)
318		return printer
319
320	@staticmethod
321	def filter_printer_function_no_newline (filter_callback):
322		def printer (f):
323			for line in filter_callback (f):
324				sys.stdout.writelines ([line])
325		return printer
326
327
328class Ngram:
329
330	@staticmethod
331	def generator (n):
332
333		def gen (f):
334			l = []
335			for x in f:
336				l.append (x)
337				if len (l) == n:
338					yield tuple (l)
339					l[:1] = []
340
341		gen.n = n
342		return gen
343
344
345class UtilMains:
346
347	@staticmethod
348	def process_multiple_files (callback, mnemonic = "FILE"):
349
350		if "--help" in sys.argv:
351			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
352			sys.exit (1)
353
354		try:
355			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
356			for s in files:
357				callback (FileHelpers.open_file_or_stdin (s))
358		except IOError as e:
359			if e.errno != errno.EPIPE:
360				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
361				sys.exit (1)
362
363	@staticmethod
364	def process_multiple_args (callback, mnemonic):
365
366		if len (sys.argv) == 1 or "--help" in sys.argv:
367			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
368			sys.exit (1)
369
370		try:
371			for s in sys.argv[1:]:
372				callback (s)
373		except IOError as e:
374			if e.errno != errno.EPIPE:
375				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
376				sys.exit (1)
377
378	@staticmethod
379	def filter_multiple_strings_or_stdin (callback, mnemonic, \
380					      separator = " ", \
381					      concat_separator = False):
382
383		if "--help" in sys.argv:
384			print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
385			      % (sys.argv[0], mnemonic, sys.argv[0]))
386			sys.exit (1)
387
388		try:
389			if len (sys.argv) == 1:
390				while (1):
391					line = sys.stdin.readline ()
392					if not len (line):
393						break
394					if line[-1] == '\n':
395						line = line[:-1]
396					print (callback (line))
397			else:
398				args = sys.argv[1:]
399				if concat_separator != False:
400					args = [concat_separator.join (args)]
401				print (separator.join (callback (x) for x in (args)))
402		except IOError as e:
403			if e.errno != errno.EPIPE:
404				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
405				sys.exit (1)
406
407
408class Unicode:
409
410	@staticmethod
411	def decode (s):
412		return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
413
414	@staticmethod
415	def parse (s):
416		s = re.sub (r"0[xX]", " ", s)
417		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n	]", " ", s)
418		return [int (x, 16) for x in s.split ()]
419
420	@staticmethod
421	def encode (s):
422		s = u''.join (unichr (x) for x in Unicode.parse (s))
423		if sys.version_info[0] == 2: s = s.encode ('utf-8')
424		return s
425
426	shorthands = {
427		"ZERO WIDTH NON-JOINER": "ZWNJ",
428		"ZERO WIDTH JOINER": "ZWJ",
429		"NARROW NO-BREAK SPACE": "NNBSP",
430		"COMBINING GRAPHEME JOINER": "CGJ",
431		"LEFT-TO-RIGHT MARK": "LRM",
432		"RIGHT-TO-LEFT MARK": "RLM",
433		"LEFT-TO-RIGHT EMBEDDING": "LRE",
434		"RIGHT-TO-LEFT EMBEDDING": "RLE",
435		"POP DIRECTIONAL FORMATTING": "PDF",
436		"LEFT-TO-RIGHT OVERRIDE": "LRO",
437		"RIGHT-TO-LEFT OVERRIDE": "RLO",
438	}
439
440	@staticmethod
441	def pretty_name (u):
442		try:
443			s = unicodedata.name (u)
444		except ValueError:
445			return "XXX"
446		s = re.sub (".* LETTER ", "", s)
447		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
448		s = re.sub (".* SIGN ", "", s)
449		s = re.sub (".* COMBINING ", "", s)
450		if re.match (".* VIRAMA", s):
451			s = "HALANT"
452		if s in Unicode.shorthands:
453			s = Unicode.shorthands[s]
454		return s
455
456	@staticmethod
457	def pretty_names (s):
458		s = re.sub (r"[<+>\\uU]", " ", s)
459		s = re.sub (r"0[xX]", " ", s)
460		s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
461		return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
462
463
464class FileHelpers:
465
466	@staticmethod
467	def open_file_or_stdin (f):
468		if f == '-':
469			return sys.stdin
470		return file (f)
471
472
473class Manifest:
474
475	@staticmethod
476	def read (s, strict = True):
477
478		if not os.path.exists (s):
479			if strict:
480				print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
481				sys.exit (1)
482			return
483
484		s = os.path.normpath (s)
485
486		if os.path.isdir (s):
487
488			try:
489				m = file (os.path.join (s, "MANIFEST"))
490				items = [x.strip () for x in m.readlines ()]
491				for f in items:
492					for p in Manifest.read (os.path.join (s, f)):
493						yield p
494			except IOError:
495				if strict:
496					print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
497					sys.exit (1)
498				return
499		else:
500			yield s
501
502	@staticmethod
503	def update_recursive (s):
504
505		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
506
507			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
508				if f in dirnames:
509					dirnames.remove (f)
510				if f in filenames:
511					filenames.remove (f)
512			dirnames.sort ()
513			filenames.sort ()
514			ms = os.path.join (dirpath, "MANIFEST")
515			print ("  GEN    %s" % ms)
516			m = open (ms, "w")
517			for f in filenames:
518				print (f, file=m)
519			for f in dirnames:
520				print (f, file=m)
521			for f in dirnames:
522				Manifest.update_recursive (os.path.join (dirpath, f))
523
524if __name__ == '__main__':
525	pass
526