1# -*- coding: utf-8 -*-
2from fontTools.misc.py23 import bytechr, byteord, bytesjoin, strjoin, tobytes, tostr
3from fontTools.misc import sstruct
4from fontTools.misc.textTools import safeEval
5from fontTools.misc.encodingTools import getEncoding
6from fontTools.ttLib import newTable
7from . import DefaultTable
8import struct
9import logging
10
11
12log = logging.getLogger(__name__)
13
14nameRecordFormat = """
15		>	# big endian
16		platformID:	H
17		platEncID:	H
18		langID:		H
19		nameID:		H
20		length:		H
21		offset:		H
22"""
23
24nameRecordSize = sstruct.calcsize(nameRecordFormat)
25
26
27class table__n_a_m_e(DefaultTable.DefaultTable):
28	dependencies = ["ltag"]
29
30	def decompile(self, data, ttFont):
31		format, n, stringOffset = struct.unpack(b">HHH", data[:6])
32		expectedStringOffset = 6 + n * nameRecordSize
33		if stringOffset != expectedStringOffset:
34			log.error(
35				"'name' table stringOffset incorrect. Expected: %s; Actual: %s",
36				expectedStringOffset, stringOffset)
37		stringData = data[stringOffset:]
38		data = data[6:]
39		self.names = []
40		for i in range(n):
41			if len(data) < 12:
42				log.error('skipping malformed name record #%d', i)
43				continue
44			name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
45			name.string = stringData[name.offset:name.offset+name.length]
46			if name.offset + name.length > len(stringData):
47				log.error('skipping malformed name record #%d', i)
48				continue
49			assert len(name.string) == name.length
50			#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
51			#	if len(name.string) % 2:
52			#		print "2-byte string doesn't have even length!"
53			#		print name.__dict__
54			del name.offset, name.length
55			self.names.append(name)
56
57	def compile(self, ttFont):
58		if not hasattr(self, "names"):
59			# only happens when there are NO name table entries read
60			# from the TTX file
61			self.names = []
62		names = self.names
63		names.sort() # sort according to the spec; see NameRecord.__lt__()
64		stringData = b""
65		format = 0
66		n = len(names)
67		stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
68		data = struct.pack(b">HHH", format, n, stringOffset)
69		lastoffset = 0
70		done = {}  # remember the data so we can reuse the "pointers"
71		for name in names:
72			string = name.toBytes()
73			if string in done:
74				name.offset, name.length = done[string]
75			else:
76				name.offset, name.length = done[string] = len(stringData), len(string)
77				stringData = bytesjoin([stringData, string])
78			data = data + sstruct.pack(nameRecordFormat, name)
79		return data + stringData
80
81	def toXML(self, writer, ttFont):
82		for name in self.names:
83			name.toXML(writer, ttFont)
84
85	def fromXML(self, name, attrs, content, ttFont):
86		if name != "namerecord":
87			return # ignore unknown tags
88		if not hasattr(self, "names"):
89			self.names = []
90		name = NameRecord()
91		self.names.append(name)
92		name.fromXML(name, attrs, content, ttFont)
93
94	def getName(self, nameID, platformID, platEncID, langID=None):
95		for namerecord in self.names:
96			if (	namerecord.nameID == nameID and
97					namerecord.platformID == platformID and
98					namerecord.platEncID == platEncID):
99				if langID is None or namerecord.langID == langID:
100					return namerecord
101		return None # not found
102
103	def getDebugName(self, nameID):
104		englishName = someName = None
105		for name in self.names:
106			if name.nameID != nameID:
107				continue
108			try:
109				unistr = name.toUnicode()
110			except UnicodeDecodeError:
111				continue
112
113			someName = unistr
114			if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
115				englishName = unistr
116				break
117		if englishName:
118			return englishName
119		elif someName:
120			return someName
121		else:
122			return None
123
124	def setName(self, string, nameID, platformID, platEncID, langID):
125		""" Set the 'string' for the name record identified by 'nameID', 'platformID',
126		'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
127		and append to the name table.
128
129		'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
130		it is assumed to be already encoded with the correct plaform-specific encoding
131		identified by the (platformID, platEncID, langID) triplet. A warning is issued
132		to prevent unexpected results.
133		"""
134		if not hasattr(self, 'names'):
135			self.names = []
136		if not isinstance(string, str):
137			if isinstance(string, bytes):
138				log.warning(
139					"name string is bytes, ensure it's correctly encoded: %r", string)
140			else:
141				raise TypeError(
142					"expected unicode or bytes, found %s: %r" % (
143						type(string).__name__, string))
144		namerecord = self.getName(nameID, platformID, platEncID, langID)
145		if namerecord:
146			namerecord.string = string
147		else:
148			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
149
150	def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
151		"""Remove any name records identified by the given combination of 'nameID',
152		'platformID', 'platEncID' and 'langID'.
153		"""
154		args = {
155			argName: argValue
156			for argName, argValue in (
157				("nameID", nameID),
158				("platformID", platformID),
159				("platEncID", platEncID),
160				("langID", langID),
161			)
162			if argValue is not None
163		}
164		if not args:
165			# no arguments, nothing to do
166			return
167		self.names = [
168			rec for rec in self.names
169			if any(
170				argValue != getattr(rec, argName)
171				for argName, argValue in args.items()
172			)
173		]
174
175	def _findUnusedNameID(self, minNameID=256):
176		"""Finds an unused name id.
177
178		The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
179		following the last nameID in the name table.
180		"""
181		names = getattr(self, 'names', [])
182		nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
183		if nameID > 32767:
184			raise ValueError("nameID must be less than 32768")
185		return nameID
186
187	def findMultilingualName(self, names, windows=True, mac=True, minNameID=0):
188		"""Return the name ID of an existing multilingual name that
189		matches the 'names' dictionary, or None if not found.
190
191		'names' is a dictionary with the name in multiple languages,
192		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
193		The keys can be arbitrary IETF BCP 47 language codes;
194		the values are Unicode strings.
195
196		If 'windows' is True, the returned name ID is guaranteed
197		exist for all requested languages for platformID=3 and
198		platEncID=1.
199		If 'mac' is True, the returned name ID is guaranteed to exist
200		for all requested languages for platformID=1 and platEncID=0.
201
202		The returned name ID will not be less than the 'minNameID'
203		argument.
204		"""
205		# Gather the set of requested
206		#   (string, platformID, platEncID, langID)
207		# tuples
208		reqNameSet = set()
209		for lang, name in sorted(names.items()):
210			if windows:
211				windowsName = _makeWindowsName(name, None, lang)
212				if windowsName is not None:
213					reqNameSet.add((windowsName.string,
214					                windowsName.platformID,
215					                windowsName.platEncID,
216					                windowsName.langID))
217			if mac:
218				macName = _makeMacName(name, None, lang)
219				if macName is not None:
220					reqNameSet.add((macName.string,
221				                    macName.platformID,
222				                    macName.platEncID,
223				                    macName.langID))
224
225		# Collect matching name IDs
226		matchingNames = dict()
227		for name in self.names:
228			try:
229				key = (name.toUnicode(), name.platformID,
230				       name.platEncID, name.langID)
231			except UnicodeDecodeError:
232				continue
233			if key in reqNameSet and name.nameID >= minNameID:
234				nameSet = matchingNames.setdefault(name.nameID, set())
235				nameSet.add(key)
236
237		# Return the first name ID that defines all requested strings
238		for nameID, nameSet in sorted(matchingNames.items()):
239			if nameSet == reqNameSet:
240				return nameID
241
242		return None  # not found
243
244	def addMultilingualName(self, names, ttFont=None, nameID=None,
245	                        windows=True, mac=True, minNameID=0):
246		"""Add a multilingual name, returning its name ID
247
248		'names' is a dictionary with the name in multiple languages,
249		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
250		The keys can be arbitrary IETF BCP 47 language codes;
251		the values are Unicode strings.
252
253		'ttFont' is the TTFont to which the names are added, or None.
254		If present, the font's 'ltag' table can get populated
255		to store exotic language codes, which allows encoding
256		names that otherwise cannot get encoded at all.
257
258		'nameID' is the name ID to be used, or None to let the library
259		find an existing set of name records that match, or pick an
260		unused name ID.
261
262		If 'windows' is True, a platformID=3 name record will be added.
263		If 'mac' is True, a platformID=1 name record will be added.
264
265		If the 'nameID' argument is None, the created nameID will not
266		be less than the 'minNameID' argument.
267		"""
268		if not hasattr(self, 'names'):
269			self.names = []
270		if nameID is None:
271			# Reuse nameID if possible
272			nameID = self.findMultilingualName(
273				names, windows=windows, mac=mac, minNameID=minNameID)
274			if nameID is not None:
275				return nameID
276			nameID = self._findUnusedNameID()
277		# TODO: Should minimize BCP 47 language codes.
278		# https://github.com/fonttools/fonttools/issues/930
279		for lang, name in sorted(names.items()):
280			if windows:
281				windowsName = _makeWindowsName(name, nameID, lang)
282				if windowsName is not None:
283					self.names.append(windowsName)
284				else:
285					# We cannot not make a Windows name: make sure we add a
286					# Mac name as a fallback. This can happen for exotic
287					# BCP47 language tags that have no Windows language code.
288					mac = True
289			if mac:
290				macName = _makeMacName(name, nameID, lang, ttFont)
291				if macName is not None:
292					self.names.append(macName)
293		return nameID
294
295	def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
296		""" Add a new name record containing 'string' for each (platformID, platEncID,
297		langID) tuple specified in the 'platforms' list.
298
299		The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
300		following the last nameID in the name table.
301		If no 'platforms' are specified, two English name records are added, one for the
302		Macintosh (platformID=0), and one for the Windows platform (3).
303
304		The 'string' must be a Unicode string, so it can be encoded with different,
305		platform-specific encodings.
306
307		Return the new nameID.
308		"""
309		assert len(platforms) > 0, \
310			"'platforms' must contain at least one (platformID, platEncID, langID) tuple"
311		if not hasattr(self, 'names'):
312			self.names = []
313		if not isinstance(string, str):
314			raise TypeError(
315				"expected str, found %s: %r" % (type(string).__name__, string))
316		nameID = self._findUnusedNameID(minNameID + 1)
317		for platformID, platEncID, langID in platforms:
318			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
319		return nameID
320
321
322def makeName(string, nameID, platformID, platEncID, langID):
323	name = NameRecord()
324	name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
325		string, nameID, platformID, platEncID, langID)
326	return name
327
328
329def _makeWindowsName(name, nameID, language):
330	"""Create a NameRecord for the Microsoft Windows platform
331
332	'language' is an arbitrary IETF BCP 47 language identifier such
333	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
334	does not support the desired language, the result will be None.
335	Future versions of fonttools might return a NameRecord for the
336	OpenType 'name' table format 1, but this is not implemented yet.
337	"""
338	langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
339	if langID is not None:
340		return makeName(name, nameID, 3, 1, langID)
341	else:
342		log.warning("cannot add Windows name in language %s "
343		            "because fonttools does not yet support "
344		            "name table format 1" % language)
345		return None
346
347
348def _makeMacName(name, nameID, language, font=None):
349	"""Create a NameRecord for Apple platforms
350
351	'language' is an arbitrary IETF BCP 47 language identifier such
352	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
353	create a Macintosh NameRecord that is understood by old applications
354	(platform ID 1 and an old-style Macintosh language enum). If this
355	is not possible, we create a Unicode NameRecord (platform ID 0)
356	whose language points to the font’s 'ltag' table. The latter
357	can encode any string in any language, but legacy applications
358	might not recognize the format (in which case they will ignore
359	those names).
360
361	'font' should be the TTFont for which you want to create a name.
362	If 'font' is None, we only return NameRecords for legacy Macintosh;
363	in that case, the result will be None for names that need to
364	be encoded with an 'ltag' table.
365
366	See the section “The language identifier” in Apple’s specification:
367	https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
368	"""
369	macLang = _MAC_LANGUAGE_CODES.get(language.lower())
370	macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
371	if macLang is not None and macScript is not None:
372		encoding = getEncoding(1, macScript, macLang, default="ascii")
373		# Check if we can actually encode this name. If we can't,
374		# for example because we have no support for the legacy
375		# encoding, or because the name string contains Unicode
376		# characters that the legacy encoding cannot represent,
377		# we fall back to encoding the name in Unicode and put
378		# the language tag into the ltag table.
379		try:
380			_ = tobytes(name, encoding, errors="strict")
381			return makeName(name, nameID, 1, macScript, macLang)
382		except UnicodeEncodeError:
383			pass
384	if font is not None:
385		ltag = font.tables.get("ltag")
386		if ltag is None:
387			ltag = font["ltag"] = newTable("ltag")
388		# 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
389		# “The preferred platform-specific code for Unicode would be 3 or 4.”
390		# https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
391		return makeName(name, nameID, 0, 4, ltag.addTag(language))
392	else:
393		log.warning("cannot store language %s into 'ltag' table "
394		            "without having access to the TTFont object" %
395		            language)
396		return None
397
398
399class NameRecord(object):
400
401	def getEncoding(self, default='ascii'):
402		"""Returns the Python encoding name for this name entry based on its platformID,
403		platEncID, and langID.  If encoding for these values is not known, by default
404		'ascii' is returned.  That can be overriden by passing a value to the default
405		argument.
406		"""
407		return getEncoding(self.platformID, self.platEncID, self.langID, default)
408
409	def encodingIsUnicodeCompatible(self):
410		return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
411
412	def __str__(self):
413		return self.toStr(errors='backslashreplace')
414
415	def isUnicode(self):
416		return (self.platformID == 0 or
417			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
418
419	def toUnicode(self, errors='strict'):
420		"""
421		If self.string is a Unicode string, return it; otherwise try decoding the
422		bytes in self.string to a Unicode string using the encoding of this
423		entry as returned by self.getEncoding(); Note that  self.getEncoding()
424		returns 'ascii' if the encoding is unknown to the library.
425
426		Certain heuristics are performed to recover data from bytes that are
427		ill-formed in the chosen encoding, or that otherwise look misencoded
428		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
429		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
430		the error is handled according to the errors parameter to this function, which is
431		passed to the underlying decode() function; by default it throws a
432		UnicodeDecodeError exception.
433
434		Note: The mentioned heuristics mean that roundtripping a font to XML and back
435		to binary might recover some misencoded data whereas just loading the font
436		and saving it back will not change them.
437		"""
438		def isascii(b):
439			return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
440		encoding = self.getEncoding()
441		string = self.string
442
443		if isinstance(string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1:
444			# Recover badly encoded UTF-16 strings that have an odd number of bytes:
445			# - If the last byte is zero, drop it.  Otherwise,
446			# - If all the odd bytes are zero and all the even bytes are ASCII,
447			#   prepend one zero byte.  Otherwise,
448			# - If first byte is zero and all other bytes are ASCII, insert zero
449			#   bytes between consecutive ASCII bytes.
450			#
451			# (Yes, I've seen all of these in the wild... sigh)
452			if byteord(string[-1]) == 0:
453				string = string[:-1]
454			elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
455				string = b'\0' + string
456			elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
457				string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
458
459		string = tostr(string, encoding=encoding, errors=errors)
460
461		# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
462		# Fix it up.
463		if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
464			# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
465			# narrow it down.
466			string = ''.join(c for c in string[1::2])
467
468		return string
469
470	def toBytes(self, errors='strict'):
471		""" If self.string is a bytes object, return it; otherwise try encoding
472		the Unicode string in self.string to bytes using the encoding of this
473		entry as returned by self.getEncoding(); Note that self.getEncoding()
474		returns 'ascii' if the encoding is unknown to the library.
475
476		If the Unicode string cannot be encoded to bytes in the chosen encoding,
477		the error is handled according to the errors parameter to this function,
478		which is passed to the underlying encode() function; by default it throws a
479		UnicodeEncodeError exception.
480		"""
481		return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
482
483	toStr = toUnicode
484
485	def toXML(self, writer, ttFont):
486		try:
487			unistr = self.toUnicode()
488		except UnicodeDecodeError:
489			unistr = None
490		attrs = [
491				("nameID", self.nameID),
492				("platformID", self.platformID),
493				("platEncID", self.platEncID),
494				("langID", hex(self.langID)),
495			]
496
497		if unistr is None or not self.encodingIsUnicodeCompatible():
498			attrs.append(("unicode", unistr is not None))
499
500		writer.begintag("namerecord", attrs)
501		writer.newline()
502		if unistr is not None:
503			writer.write(unistr)
504		else:
505			writer.write8bit(self.string)
506		writer.newline()
507		writer.endtag("namerecord")
508		writer.newline()
509
510	def fromXML(self, name, attrs, content, ttFont):
511		self.nameID = safeEval(attrs["nameID"])
512		self.platformID = safeEval(attrs["platformID"])
513		self.platEncID = safeEval(attrs["platEncID"])
514		self.langID =  safeEval(attrs["langID"])
515		s = strjoin(content).strip()
516		encoding = self.getEncoding()
517		if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
518			self.string = s.encode(encoding)
519		else:
520			# This is the inverse of write8bit...
521			self.string = s.encode("latin1")
522
523	def __lt__(self, other):
524		if type(self) != type(other):
525			return NotImplemented
526
527		try:
528			# implemented so that list.sort() sorts according to the spec.
529			selfTuple = (
530				self.platformID,
531				self.platEncID,
532				self.langID,
533				self.nameID,
534				self.toBytes(),
535			)
536			otherTuple = (
537				other.platformID,
538				other.platEncID,
539				other.langID,
540				other.nameID,
541				other.toBytes(),
542			)
543			return selfTuple < otherTuple
544		except (UnicodeEncodeError, AttributeError):
545			# This can only happen for
546			# 1) an object that is not a NameRecord, or
547			# 2) an unlikely incomplete NameRecord object which has not been
548			#    fully populated, or
549			# 3) when all IDs are identical but the strings can't be encoded
550			#    for their platform encoding.
551			# In all cases it is best to return NotImplemented.
552			return NotImplemented
553
554	def __repr__(self):
555		return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
556				self.nameID, self.platformID, self.langID)
557
558
559# Windows language ID → IETF BCP-47 language tag
560#
561# While Microsoft indicates a region/country for all its language
562# IDs, we follow Unicode practice by omitting “most likely subtags”
563# as per Unicode CLDR. For example, English is simply “en” and not
564# “en-Latn” because according to Unicode, the default script
565# for English is Latin.
566#
567# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
568# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
569_WINDOWS_LANGUAGES = {
570    0x0436: 'af',
571    0x041C: 'sq',
572    0x0484: 'gsw',
573    0x045E: 'am',
574    0x1401: 'ar-DZ',
575    0x3C01: 'ar-BH',
576    0x0C01: 'ar',
577    0x0801: 'ar-IQ',
578    0x2C01: 'ar-JO',
579    0x3401: 'ar-KW',
580    0x3001: 'ar-LB',
581    0x1001: 'ar-LY',
582    0x1801: 'ary',
583    0x2001: 'ar-OM',
584    0x4001: 'ar-QA',
585    0x0401: 'ar-SA',
586    0x2801: 'ar-SY',
587    0x1C01: 'aeb',
588    0x3801: 'ar-AE',
589    0x2401: 'ar-YE',
590    0x042B: 'hy',
591    0x044D: 'as',
592    0x082C: 'az-Cyrl',
593    0x042C: 'az',
594    0x046D: 'ba',
595    0x042D: 'eu',
596    0x0423: 'be',
597    0x0845: 'bn',
598    0x0445: 'bn-IN',
599    0x201A: 'bs-Cyrl',
600    0x141A: 'bs',
601    0x047E: 'br',
602    0x0402: 'bg',
603    0x0403: 'ca',
604    0x0C04: 'zh-HK',
605    0x1404: 'zh-MO',
606    0x0804: 'zh',
607    0x1004: 'zh-SG',
608    0x0404: 'zh-TW',
609    0x0483: 'co',
610    0x041A: 'hr',
611    0x101A: 'hr-BA',
612    0x0405: 'cs',
613    0x0406: 'da',
614    0x048C: 'prs',
615    0x0465: 'dv',
616    0x0813: 'nl-BE',
617    0x0413: 'nl',
618    0x0C09: 'en-AU',
619    0x2809: 'en-BZ',
620    0x1009: 'en-CA',
621    0x2409: 'en-029',
622    0x4009: 'en-IN',
623    0x1809: 'en-IE',
624    0x2009: 'en-JM',
625    0x4409: 'en-MY',
626    0x1409: 'en-NZ',
627    0x3409: 'en-PH',
628    0x4809: 'en-SG',
629    0x1C09: 'en-ZA',
630    0x2C09: 'en-TT',
631    0x0809: 'en-GB',
632    0x0409: 'en',
633    0x3009: 'en-ZW',
634    0x0425: 'et',
635    0x0438: 'fo',
636    0x0464: 'fil',
637    0x040B: 'fi',
638    0x080C: 'fr-BE',
639    0x0C0C: 'fr-CA',
640    0x040C: 'fr',
641    0x140C: 'fr-LU',
642    0x180C: 'fr-MC',
643    0x100C: 'fr-CH',
644    0x0462: 'fy',
645    0x0456: 'gl',
646    0x0437: 'ka',
647    0x0C07: 'de-AT',
648    0x0407: 'de',
649    0x1407: 'de-LI',
650    0x1007: 'de-LU',
651    0x0807: 'de-CH',
652    0x0408: 'el',
653    0x046F: 'kl',
654    0x0447: 'gu',
655    0x0468: 'ha',
656    0x040D: 'he',
657    0x0439: 'hi',
658    0x040E: 'hu',
659    0x040F: 'is',
660    0x0470: 'ig',
661    0x0421: 'id',
662    0x045D: 'iu',
663    0x085D: 'iu-Latn',
664    0x083C: 'ga',
665    0x0434: 'xh',
666    0x0435: 'zu',
667    0x0410: 'it',
668    0x0810: 'it-CH',
669    0x0411: 'ja',
670    0x044B: 'kn',
671    0x043F: 'kk',
672    0x0453: 'km',
673    0x0486: 'quc',
674    0x0487: 'rw',
675    0x0441: 'sw',
676    0x0457: 'kok',
677    0x0412: 'ko',
678    0x0440: 'ky',
679    0x0454: 'lo',
680    0x0426: 'lv',
681    0x0427: 'lt',
682    0x082E: 'dsb',
683    0x046E: 'lb',
684    0x042F: 'mk',
685    0x083E: 'ms-BN',
686    0x043E: 'ms',
687    0x044C: 'ml',
688    0x043A: 'mt',
689    0x0481: 'mi',
690    0x047A: 'arn',
691    0x044E: 'mr',
692    0x047C: 'moh',
693    0x0450: 'mn',
694    0x0850: 'mn-CN',
695    0x0461: 'ne',
696    0x0414: 'nb',
697    0x0814: 'nn',
698    0x0482: 'oc',
699    0x0448: 'or',
700    0x0463: 'ps',
701    0x0415: 'pl',
702    0x0416: 'pt',
703    0x0816: 'pt-PT',
704    0x0446: 'pa',
705    0x046B: 'qu-BO',
706    0x086B: 'qu-EC',
707    0x0C6B: 'qu',
708    0x0418: 'ro',
709    0x0417: 'rm',
710    0x0419: 'ru',
711    0x243B: 'smn',
712    0x103B: 'smj-NO',
713    0x143B: 'smj',
714    0x0C3B: 'se-FI',
715    0x043B: 'se',
716    0x083B: 'se-SE',
717    0x203B: 'sms',
718    0x183B: 'sma-NO',
719    0x1C3B: 'sms',
720    0x044F: 'sa',
721    0x1C1A: 'sr-Cyrl-BA',
722    0x0C1A: 'sr',
723    0x181A: 'sr-Latn-BA',
724    0x081A: 'sr-Latn',
725    0x046C: 'nso',
726    0x0432: 'tn',
727    0x045B: 'si',
728    0x041B: 'sk',
729    0x0424: 'sl',
730    0x2C0A: 'es-AR',
731    0x400A: 'es-BO',
732    0x340A: 'es-CL',
733    0x240A: 'es-CO',
734    0x140A: 'es-CR',
735    0x1C0A: 'es-DO',
736    0x300A: 'es-EC',
737    0x440A: 'es-SV',
738    0x100A: 'es-GT',
739    0x480A: 'es-HN',
740    0x080A: 'es-MX',
741    0x4C0A: 'es-NI',
742    0x180A: 'es-PA',
743    0x3C0A: 'es-PY',
744    0x280A: 'es-PE',
745    0x500A: 'es-PR',
746
747    # Microsoft has defined two different language codes for
748    # “Spanish with modern sorting” and “Spanish with traditional
749    # sorting”. This makes sense for collation APIs, and it would be
750    # possible to express this in BCP 47 language tags via Unicode
751    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
752    # sorting”). However, for storing names in fonts, this distinction
753    # does not make sense, so we use “es” in both cases.
754    0x0C0A: 'es',
755    0x040A: 'es',
756
757    0x540A: 'es-US',
758    0x380A: 'es-UY',
759    0x200A: 'es-VE',
760    0x081D: 'sv-FI',
761    0x041D: 'sv',
762    0x045A: 'syr',
763    0x0428: 'tg',
764    0x085F: 'tzm',
765    0x0449: 'ta',
766    0x0444: 'tt',
767    0x044A: 'te',
768    0x041E: 'th',
769    0x0451: 'bo',
770    0x041F: 'tr',
771    0x0442: 'tk',
772    0x0480: 'ug',
773    0x0422: 'uk',
774    0x042E: 'hsb',
775    0x0420: 'ur',
776    0x0843: 'uz-Cyrl',
777    0x0443: 'uz',
778    0x042A: 'vi',
779    0x0452: 'cy',
780    0x0488: 'wo',
781    0x0485: 'sah',
782    0x0478: 'ii',
783    0x046A: 'yo',
784}
785
786
787_MAC_LANGUAGES = {
788    0: 'en',
789    1: 'fr',
790    2: 'de',
791    3: 'it',
792    4: 'nl',
793    5: 'sv',
794    6: 'es',
795    7: 'da',
796    8: 'pt',
797    9: 'no',
798    10: 'he',
799    11: 'ja',
800    12: 'ar',
801    13: 'fi',
802    14: 'el',
803    15: 'is',
804    16: 'mt',
805    17: 'tr',
806    18: 'hr',
807    19: 'zh-Hant',
808    20: 'ur',
809    21: 'hi',
810    22: 'th',
811    23: 'ko',
812    24: 'lt',
813    25: 'pl',
814    26: 'hu',
815    27: 'es',
816    28: 'lv',
817    29: 'se',
818    30: 'fo',
819    31: 'fa',
820    32: 'ru',
821    33: 'zh',
822    34: 'nl-BE',
823    35: 'ga',
824    36: 'sq',
825    37: 'ro',
826    38: 'cz',
827    39: 'sk',
828    40: 'sl',
829    41: 'yi',
830    42: 'sr',
831    43: 'mk',
832    44: 'bg',
833    45: 'uk',
834    46: 'be',
835    47: 'uz',
836    48: 'kk',
837    49: 'az-Cyrl',
838    50: 'az-Arab',
839    51: 'hy',
840    52: 'ka',
841    53: 'mo',
842    54: 'ky',
843    55: 'tg',
844    56: 'tk',
845    57: 'mn-CN',
846    58: 'mn',
847    59: 'ps',
848    60: 'ks',
849    61: 'ku',
850    62: 'sd',
851    63: 'bo',
852    64: 'ne',
853    65: 'sa',
854    66: 'mr',
855    67: 'bn',
856    68: 'as',
857    69: 'gu',
858    70: 'pa',
859    71: 'or',
860    72: 'ml',
861    73: 'kn',
862    74: 'ta',
863    75: 'te',
864    76: 'si',
865    77: 'my',
866    78: 'km',
867    79: 'lo',
868    80: 'vi',
869    81: 'id',
870    82: 'tl',
871    83: 'ms',
872    84: 'ms-Arab',
873    85: 'am',
874    86: 'ti',
875    87: 'om',
876    88: 'so',
877    89: 'sw',
878    90: 'rw',
879    91: 'rn',
880    92: 'ny',
881    93: 'mg',
882    94: 'eo',
883    128: 'cy',
884    129: 'eu',
885    130: 'ca',
886    131: 'la',
887    132: 'qu',
888    133: 'gn',
889    134: 'ay',
890    135: 'tt',
891    136: 'ug',
892    137: 'dz',
893    138: 'jv',
894    139: 'su',
895    140: 'gl',
896    141: 'af',
897    142: 'br',
898    143: 'iu',
899    144: 'gd',
900    145: 'gv',
901    146: 'ga',
902    147: 'to',
903    148: 'el-polyton',
904    149: 'kl',
905    150: 'az',
906    151: 'nn',
907}
908
909
910_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
911_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
912
913
914# MacOS language ID → MacOS script ID
915#
916# Note that the script ID is not sufficient to determine what encoding
917# to use in TrueType files. For some languages, MacOS used a modification
918# of a mainstream script. For example, an Icelandic name would be stored
919# with smRoman in the TrueType naming table, but the actual encoding
920# is a special Icelandic version of the normal Macintosh Roman encoding.
921# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
922# Syllables but MacOS had run out of available script codes, so this was
923# done as a (pretty radical) “modification” of Ethiopic.
924#
925# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
926_MAC_LANGUAGE_TO_SCRIPT = {
927    0: 0,  # langEnglish → smRoman
928    1: 0,  # langFrench → smRoman
929    2: 0,  # langGerman → smRoman
930    3: 0,  # langItalian → smRoman
931    4: 0,  # langDutch → smRoman
932    5: 0,  # langSwedish → smRoman
933    6: 0,  # langSpanish → smRoman
934    7: 0,  # langDanish → smRoman
935    8: 0,  # langPortuguese → smRoman
936    9: 0,  # langNorwegian → smRoman
937    10: 5,  # langHebrew → smHebrew
938    11: 1,  # langJapanese → smJapanese
939    12: 4,  # langArabic → smArabic
940    13: 0,  # langFinnish → smRoman
941    14: 6,  # langGreek → smGreek
942    15: 0,  # langIcelandic → smRoman (modified)
943    16: 0,  # langMaltese → smRoman
944    17: 0,  # langTurkish → smRoman (modified)
945    18: 0,  # langCroatian → smRoman (modified)
946    19: 2,  # langTradChinese → smTradChinese
947    20: 4,  # langUrdu → smArabic
948    21: 9,  # langHindi → smDevanagari
949    22: 21,  # langThai → smThai
950    23: 3,  # langKorean → smKorean
951    24: 29,  # langLithuanian → smCentralEuroRoman
952    25: 29,  # langPolish → smCentralEuroRoman
953    26: 29,  # langHungarian → smCentralEuroRoman
954    27: 29,  # langEstonian → smCentralEuroRoman
955    28: 29,  # langLatvian → smCentralEuroRoman
956    29: 0,  # langSami → smRoman
957    30: 0,  # langFaroese → smRoman (modified)
958    31: 4,  # langFarsi → smArabic (modified)
959    32: 7,  # langRussian → smCyrillic
960    33: 25,  # langSimpChinese → smSimpChinese
961    34: 0,  # langFlemish → smRoman
962    35: 0,  # langIrishGaelic → smRoman (modified)
963    36: 0,  # langAlbanian → smRoman
964    37: 0,  # langRomanian → smRoman (modified)
965    38: 29,  # langCzech → smCentralEuroRoman
966    39: 29,  # langSlovak → smCentralEuroRoman
967    40: 0,  # langSlovenian → smRoman (modified)
968    41: 5,  # langYiddish → smHebrew
969    42: 7,  # langSerbian → smCyrillic
970    43: 7,  # langMacedonian → smCyrillic
971    44: 7,  # langBulgarian → smCyrillic
972    45: 7,  # langUkrainian → smCyrillic (modified)
973    46: 7,  # langByelorussian → smCyrillic
974    47: 7,  # langUzbek → smCyrillic
975    48: 7,  # langKazakh → smCyrillic
976    49: 7,  # langAzerbaijani → smCyrillic
977    50: 4,  # langAzerbaijanAr → smArabic
978    51: 24,  # langArmenian → smArmenian
979    52: 23,  # langGeorgian → smGeorgian
980    53: 7,  # langMoldavian → smCyrillic
981    54: 7,  # langKirghiz → smCyrillic
982    55: 7,  # langTajiki → smCyrillic
983    56: 7,  # langTurkmen → smCyrillic
984    57: 27,  # langMongolian → smMongolian
985    58: 7,  # langMongolianCyr → smCyrillic
986    59: 4,  # langPashto → smArabic
987    60: 4,  # langKurdish → smArabic
988    61: 4,  # langKashmiri → smArabic
989    62: 4,  # langSindhi → smArabic
990    63: 26,  # langTibetan → smTibetan
991    64: 9,  # langNepali → smDevanagari
992    65: 9,  # langSanskrit → smDevanagari
993    66: 9,  # langMarathi → smDevanagari
994    67: 13,  # langBengali → smBengali
995    68: 13,  # langAssamese → smBengali
996    69: 11,  # langGujarati → smGujarati
997    70: 10,  # langPunjabi → smGurmukhi
998    71: 12,  # langOriya → smOriya
999    72: 17,  # langMalayalam → smMalayalam
1000    73: 16,  # langKannada → smKannada
1001    74: 14,  # langTamil → smTamil
1002    75: 15,  # langTelugu → smTelugu
1003    76: 18,  # langSinhalese → smSinhalese
1004    77: 19,  # langBurmese → smBurmese
1005    78: 20,  # langKhmer → smKhmer
1006    79: 22,  # langLao → smLao
1007    80: 30,  # langVietnamese → smVietnamese
1008    81: 0,  # langIndonesian → smRoman
1009    82: 0,  # langTagalog → smRoman
1010    83: 0,  # langMalayRoman → smRoman
1011    84: 4,  # langMalayArabic → smArabic
1012    85: 28,  # langAmharic → smEthiopic
1013    86: 28,  # langTigrinya → smEthiopic
1014    87: 28,  # langOromo → smEthiopic
1015    88: 0,  # langSomali → smRoman
1016    89: 0,  # langSwahili → smRoman
1017    90: 0,  # langKinyarwanda → smRoman
1018    91: 0,  # langRundi → smRoman
1019    92: 0,  # langNyanja → smRoman
1020    93: 0,  # langMalagasy → smRoman
1021    94: 0,  # langEsperanto → smRoman
1022    128: 0,  # langWelsh → smRoman (modified)
1023    129: 0,  # langBasque → smRoman
1024    130: 0,  # langCatalan → smRoman
1025    131: 0,  # langLatin → smRoman
1026    132: 0,  # langQuechua → smRoman
1027    133: 0,  # langGuarani → smRoman
1028    134: 0,  # langAymara → smRoman
1029    135: 7,  # langTatar → smCyrillic
1030    136: 4,  # langUighur → smArabic
1031    137: 26,  # langDzongkha → smTibetan
1032    138: 0,  # langJavaneseRom → smRoman
1033    139: 0,  # langSundaneseRom → smRoman
1034    140: 0,  # langGalician → smRoman
1035    141: 0,  # langAfrikaans → smRoman
1036    142: 0,  # langBreton → smRoman (modified)
1037    143: 28,  # langInuktitut → smEthiopic (modified)
1038    144: 0,  # langScottishGaelic → smRoman (modified)
1039    145: 0,  # langManxGaelic → smRoman (modified)
1040    146: 0,  # langIrishGaelicScript → smRoman (modified)
1041    147: 0,  # langTongan → smRoman
1042    148: 6,  # langGreekAncient → smRoman
1043    149: 0,  # langGreenlandic → smRoman
1044    150: 0,  # langAzerbaijanRoman → smRoman
1045    151: 0,   # langNynorsk → smRoman
1046}
1047