1#!/usr/bin/env python
2
3from __future__ import print_function, division, absolute_import
4
5import sys, os, re, difflib, unicodedata, errno, cgi
6from itertools import *
7try:
8	import unicodedata2 as unicodedata
9except Exception:
10	pass
11
12diff_symbols = "-+=*&^%$#@!~/"
13diff_colors = ['red', 'green', 'blue']
14
15def codepoints(s):
16	return (ord (u) for u in s)
17
18try:
19	unichr = unichr
20
21	if sys.maxunicode < 0x10FFFF:
22		# workarounds for Python 2 "narrow" builds with UCS2-only support.
23
24		_narrow_unichr = unichr
25
26		def unichr(i):
27			"""
28			Return the unicode character whose Unicode code is the integer 'i'.
29			The valid range is 0 to 0x10FFFF inclusive.
30
31			>>> _narrow_unichr(0xFFFF + 1)
32			Traceback (most recent call last):
33			  File "<stdin>", line 1, in ?
34			ValueError: unichr() arg not in range(0x10000) (narrow Python build)
35			>>> unichr(0xFFFF + 1) == u'\U00010000'
36			True
37			>>> unichr(1114111) == u'\U0010FFFF'
38			True
39			>>> unichr(0x10FFFF + 1)
40			Traceback (most recent call last):
41			  File "<stdin>", line 1, in ?
42			ValueError: unichr() arg not in range(0x110000)
43			"""
44			try:
45				return _narrow_unichr(i)
46			except ValueError:
47				try:
48					padded_hex_str = hex(i)[2:].zfill(8)
49					escape_str = "\\U" + padded_hex_str
50					return escape_str.decode("unicode-escape")
51				except UnicodeDecodeError:
52					raise ValueError('unichr() arg not in range(0x110000)')
53
54		def codepoints(s):
55			high_surrogate = None
56			for u in s:
57				cp = ord (u)
58				if 0xDC00 <= cp <= 0xDFFF:
59					if high_surrogate:
60						yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
61						high_surrogate = None
62					else:
63						yield 0xFFFD
64				else:
65					if high_surrogate:
66						yield 0xFFFD
67						high_surrogate = None
68					if 0xD800 <= cp <= 0xDBFF:
69						high_surrogate = cp
70					else:
71						yield cp
72						high_surrogate = None
73			if high_surrogate:
74				yield 0xFFFD
75
76except NameError:
77	unichr = chr
78
79try:
80	unicode = unicode
81except NameError:
82	unicode = str
83
84def tounicode(s, encoding='ascii', errors='strict'):
85	if not isinstance(s, unicode):
86		return s.decode(encoding, errors)
87	else:
88		return s
89
90class ColorFormatter:
91
92	class Null:
93		@staticmethod
94		def start_color (c): return ''
95		@staticmethod
96		def end_color (): return ''
97		@staticmethod
98		def escape (s): return s
99		@staticmethod
100		def newline (): return '\n'
101
102	class ANSI:
103		@staticmethod
104		def start_color (c):
105			return {
106				'red': '\033[41;37;1m',
107				'green': '\033[42;37;1m',
108				'blue': '\033[44;37;1m',
109			}[c]
110		@staticmethod
111		def end_color ():
112			return '\033[m'
113		@staticmethod
114		def escape (s): return s
115		@staticmethod
116		def newline (): return '\n'
117
118	class HTML:
119		@staticmethod
120		def start_color (c):
121			return '<span style="background:%s">' % c
122		@staticmethod
123		def end_color ():
124			return '</span>'
125		@staticmethod
126		def escape (s): return cgi.escape (s)
127		@staticmethod
128		def newline (): return '<br/>\n'
129
130	@staticmethod
131	def Auto (argv = [], out = sys.stdout):
132		format = ColorFormatter.ANSI
133		if "--format" in argv:
134			argv.remove ("--format")
135			format = ColorFormatter.ANSI
136		if "--format=ansi" in argv:
137			argv.remove ("--format=ansi")
138			format = ColorFormatter.ANSI
139		if "--format=html" in argv:
140			argv.remove ("--format=html")
141			format = ColorFormatter.HTML
142		if "--no-format" in argv:
143			argv.remove ("--no-format")
144			format = ColorFormatter.Null
145		return format
146
147
148class DiffColorizer:
149
150	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
151
152	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
153		self.formatter = formatter
154		self.colors = colors
155		self.symbols = symbols
156
157	def colorize_lines (self, lines):
158		lines = (l if l else '' for l in lines)
159		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
160		oo = ["",""]
161		st = [False, False]
162		for l in difflib.Differ().compare (*ss):
163			if l[0] == '?':
164				continue
165			if l[0] == ' ':
166				for i in range(2):
167					if st[i]:
168						oo[i] += self.formatter.end_color ()
169						st[i] = False
170				oo = [o + self.formatter.escape (l[2:]) for o in oo]
171				continue
172			if l[0] in self.symbols:
173				i = self.symbols.index (l[0])
174				if not st[i]:
175					oo[i] += self.formatter.start_color (self.colors[i])
176					st[i] = True
177				oo[i] += self.formatter.escape (l[2:])
178				continue
179		for i in range(2):
180			if st[i]:
181				oo[i] += self.formatter.end_color ()
182				st[i] = False
183		oo = [o.replace ('\n', '') for o in oo]
184		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
185
186	def colorize_diff (self, f):
187		lines = [None, None]
188		for l in f:
189			if l[0] not in self.symbols:
190				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
191				continue
192			i = self.symbols.index (l[0])
193			if lines[i]:
194				# Flush
195				for line in self.colorize_lines (lines):
196					yield line
197				lines = [None, None]
198			lines[i] = l[1:]
199			if (all (lines)):
200				# Flush
201				for line in self.colorize_lines (lines):
202					yield line
203				lines = [None, None]
204		if (any (lines)):
205			# Flush
206			for line in self.colorize_lines (lines):
207				yield line
208
209
210class ZipDiffer:
211
212	@staticmethod
213	def diff_files (files, symbols=diff_symbols):
214		files = tuple (files) # in case it's a generator, copy it
215		try:
216			for lines in izip_longest (*files):
217				if all (lines[0] == line for line in lines[1:]):
218					sys.stdout.writelines ([" ", lines[0]])
219					continue
220
221				for i, l in enumerate (lines):
222					if l:
223						sys.stdout.writelines ([symbols[i], l])
224		except IOError as e:
225			if e.errno != errno.EPIPE:
226				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
227				sys.exit (1)
228
229
230class DiffFilters:
231
232	@staticmethod
233	def filter_failures (f):
234		for key, lines in DiffHelpers.separate_test_cases (f):
235			lines = list (lines)
236			if not DiffHelpers.test_passed (lines):
237				for l in lines: yield l
238
239class Stat:
240
241	def __init__ (self):
242		self.count = 0
243		self.freq = 0
244
245	def add (self, test):
246		self.count += 1
247		self.freq += test.freq
248
249class Stats:
250
251	def __init__ (self):
252		self.passed = Stat ()
253		self.failed = Stat ()
254		self.total  = Stat ()
255
256	def add (self, test):
257		self.total.add (test)
258		if test.passed:
259			self.passed.add (test)
260		else:
261			self.failed.add (test)
262
263	def mean (self):
264		return float (self.passed.count) / self.total.count
265
266	def variance (self):
267		return (float (self.passed.count) / self.total.count) * \
268		       (float (self.failed.count) / self.total.count)
269
270	def stddev (self):
271		return self.variance () ** .5
272
273	def zscore (self, population):
274		"""Calculate the standard score.
275		   Population is the Stats for population.
276		   Self is Stats for sample.
277		   Returns larger absolute value if sample is highly unlikely to be random.
278		   Anything outside of -3..+3 is very unlikely to be random.
279		   See: http://en.wikipedia.org/wiki/Standard_score"""
280
281		return (self.mean () - population.mean ()) / population.stddev ()
282
283
284
285
286class DiffSinks:
287
288	@staticmethod
289	def print_stat (f):
290		passed = 0
291		failed = 0
292		# XXX port to Stats, but that would really slow us down here
293		for key, lines in DiffHelpers.separate_test_cases (f):
294			if DiffHelpers.test_passed (lines):
295				passed += 1
296			else:
297				failed += 1
298		total = passed + failed
299		print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
300
301
302class Test:
303
304	def __init__ (self, lines):
305		self.freq = 1
306		self.passed = True
307		self.identifier = None
308		self.text = None
309		self.unicodes = None
310		self.glyphs = None
311		for l in lines:
312			symbol = l[0]
313			if symbol != ' ':
314				self.passed = False
315			i = 1
316			if ':' in l:
317				i = l.index (':')
318				if not self.identifier:
319					self.identifier = l[1:i]
320				i = i + 2 # Skip colon and space
321			j = -1
322			if l[j] == '\n':
323				j -= 1
324			brackets = l[i] + l[j]
325			l = l[i+1:-2]
326			if brackets == '()':
327				self.text = l
328			elif brackets == '<>':
329				self.unicodes = Unicode.parse (l)
330			elif brackets == '[]':
331				# XXX we don't handle failed tests here
332				self.glyphs = l
333
334
335class DiffHelpers:
336
337	@staticmethod
338	def separate_test_cases (f):
339		'''Reads lines from f, and if the lines have identifiers, ie.
340		   have a colon character, groups them by identifier,
341		   yielding lists of all lines with the same identifier.'''
342
343		def identifier (l):
344			if ':' in l[1:]:
345				return l[1:l.index (':')]
346			return l
347		return groupby (f, key=identifier)
348
349	@staticmethod
350	def test_passed (lines):
351		lines = list (lines)
352		# XXX This is a hack, but does the job for now.
353		if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
354		if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
355		if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
356		if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
357		if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
358		if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
359		return all (l[0] == ' ' for l in lines)
360
361
362class FilterHelpers:
363
364	@staticmethod
365	def filter_printer_function (filter_callback):
366		def printer (f):
367			for line in filter_callback (f):
368				print (line)
369		return printer
370
371	@staticmethod
372	def filter_printer_function_no_newline (filter_callback):
373		def printer (f):
374			for line in filter_callback (f):
375				sys.stdout.writelines ([line])
376		return printer
377
378
379class Ngram:
380
381	@staticmethod
382	def generator (n):
383
384		def gen (f):
385			l = []
386			for x in f:
387				l.append (x)
388				if len (l) == n:
389					yield tuple (l)
390					l[:1] = []
391
392		gen.n = n
393		return gen
394
395
396class UtilMains:
397
398	@staticmethod
399	def process_multiple_files (callback, mnemonic = "FILE"):
400
401		if "--help" in sys.argv:
402			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
403			sys.exit (1)
404
405		try:
406			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
407			for s in files:
408				callback (FileHelpers.open_file_or_stdin (s))
409		except IOError as e:
410			if e.errno != errno.EPIPE:
411				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
412				sys.exit (1)
413
414	@staticmethod
415	def process_multiple_args (callback, mnemonic):
416
417		if len (sys.argv) == 1 or "--help" in sys.argv:
418			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
419			sys.exit (1)
420
421		try:
422			for s in sys.argv[1:]:
423				callback (s)
424		except IOError as e:
425			if e.errno != errno.EPIPE:
426				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
427				sys.exit (1)
428
429	@staticmethod
430	def filter_multiple_strings_or_stdin (callback, mnemonic, \
431					      separator = " ", \
432					      concat_separator = False):
433
434		if "--help" in sys.argv:
435			print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
436			      % (sys.argv[0], mnemonic, sys.argv[0]))
437			sys.exit (1)
438
439		try:
440			if len (sys.argv) == 1:
441				while (1):
442					line = sys.stdin.readline ()
443					if not len (line):
444						break
445					if line[-1] == '\n':
446						line = line[:-1]
447					print (callback (line))
448			else:
449				args = sys.argv[1:]
450				if concat_separator != False:
451					args = [concat_separator.join (args)]
452				print (separator.join (callback (x) for x in (args)))
453		except IOError as e:
454			if e.errno != errno.EPIPE:
455				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
456				sys.exit (1)
457
458
459class Unicode:
460
461	@staticmethod
462	def decode (s):
463		return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
464
465	@staticmethod
466	def parse (s):
467		s = re.sub (r"0[xX]", " ", s)
468		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
469		return [int (x, 16) for x in s.split ()]
470
471	@staticmethod
472	def encode (s):
473		s = u''.join (unichr (x) for x in Unicode.parse (s))
474		if sys.version_info[0] == 2: s = s.encode ('utf-8')
475		return s
476
477	shorthands = {
478		"ZERO WIDTH NON-JOINER": "ZWNJ",
479		"ZERO WIDTH JOINER": "ZWJ",
480		"NARROW NO-BREAK SPACE": "NNBSP",
481		"COMBINING GRAPHEME JOINER": "CGJ",
482		"LEFT-TO-RIGHT MARK": "LRM",
483		"RIGHT-TO-LEFT MARK": "RLM",
484		"LEFT-TO-RIGHT EMBEDDING": "LRE",
485		"RIGHT-TO-LEFT EMBEDDING": "RLE",
486		"POP DIRECTIONAL FORMATTING": "PDF",
487		"LEFT-TO-RIGHT OVERRIDE": "LRO",
488		"RIGHT-TO-LEFT OVERRIDE": "RLO",
489	}
490
491	@staticmethod
492	def pretty_name (u):
493		try:
494			s = unicodedata.name (u)
495		except ValueError:
496			return "XXX"
497		s = re.sub (".* LETTER ", "", s)
498		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
499		s = re.sub (".* SIGN ", "", s)
500		s = re.sub (".* COMBINING ", "", s)
501		if re.match (".* VIRAMA", s):
502			s = "HALANT"
503		if s in Unicode.shorthands:
504			s = Unicode.shorthands[s]
505		return s
506
507	@staticmethod
508	def pretty_names (s):
509		s = re.sub (r"[<+>\\uU]", " ", s)
510		s = re.sub (r"0[xX]", " ", s)
511		s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
512		return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
513
514
515class FileHelpers:
516
517	@staticmethod
518	def open_file_or_stdin (f):
519		if f == '-':
520			return sys.stdin
521		return open (f)
522
523
524class Manifest:
525
526	@staticmethod
527	def read (s, strict = True):
528
529		if not os.path.exists (s):
530			if strict:
531				print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
532				sys.exit (1)
533			return
534
535		s = os.path.normpath (s)
536
537		if os.path.isdir (s):
538
539			try:
540				m = open (os.path.join (s, "MANIFEST"))
541				items = [x.strip () for x in m.readlines ()]
542				for f in items:
543					for p in Manifest.read (os.path.join (s, f)):
544						yield p
545			except IOError:
546				if strict:
547					print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
548					sys.exit (1)
549				return
550		else:
551			yield s
552
553	@staticmethod
554	def update_recursive (s):
555
556		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
557
558			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
559				if f in dirnames:
560					dirnames.remove (f)
561				if f in filenames:
562					filenames.remove (f)
563			dirnames.sort ()
564			filenames.sort ()
565			ms = os.path.join (dirpath, "MANIFEST")
566			print ("  GEN    %s" % ms)
567			m = open (ms, "w")
568			for f in filenames:
569				print (f, file=m)
570			for f in dirnames:
571				print (f, file=m)
572			for f in dirnames:
573				Manifest.update_recursive (os.path.join (dirpath, f))
574
575if __name__ == '__main__':
576	pass
577