1#!/usr/bin/python
2# coding=UTF-8
3#
4# Copyright 2014 Google Inc. All rights reserved.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Create a curated subset of NotoSansSymbols for Android."""
19
20__author__ = 'roozbeh@google.com (Roozbeh Pournader)'
21
22import contextlib
23import os
24import sys
25import tempfile
26
27from fontTools import ttLib
28from pathlib import PurePosixPath
29
30from nototools import subset
31from nototools import unicode_data
32
33# Unicode blocks that we want to include in the font
34BLOCKS_TO_INCLUDE = """
3520D0..20FF; Combining Diacritical Marks for Symbols
362100..214F; Letterlike Symbols
372190..21FF; Arrows
382200..22FF; Mathematical Operators
392300..23FF; Miscellaneous Technical
402400..243F; Control Pictures
412440..245F; Optical Character Recognition
422460..24FF; Enclosed Alphanumerics
432500..257F; Box Drawing
442580..259F; Block Elements
4525A0..25FF; Geometric Shapes
462600..26FF; Miscellaneous Symbols
472700..27BF; Dingbats
4827C0..27EF; Miscellaneous Mathematical Symbols-A
4927F0..27FF; Supplemental Arrows-A
502800..28FF; Braille Patterns
512900..297F; Supplemental Arrows-B
522980..29FF; Miscellaneous Mathematical Symbols-B
532A00..2AFF; Supplemental Mathematical Operators
542B00..2BFF; Miscellaneous Symbols and Arrows
554DC0..4DFF; Yijing Hexagram Symbols
5610140..1018F; Ancient Greek Numbers
5710190..101CF; Ancient Symbols
58101D0..101FF; Phaistos Disc
591D000..1D0FF; Byzantine Musical Symbols
601D100..1D1FF; Musical Symbols
611D200..1D24F; Ancient Greek Musical Notation
621D300..1D35F; Tai Xuan Jing Symbols
631D360..1D37F; Counting Rod Numerals
641D400..1D7FF; Mathematical Alphanumeric Symbols
651F000..1F02F; Mahjong Tiles
661F030..1F09F; Domino Tiles
671F0A0..1F0FF; Playing Cards
681F700..1F77F; Alchemical Symbols
69"""
70
71# One-off characters to be included. At the moment, this is the Bitcoin sign
72# (since it's not supported in Roboto yet, and the Japanese TV symbols of
73# Unicode 9.
74ONE_OFF_ADDITIONS = {
75    0x20BF, # ₿ BITCOIN SIGN
76    0x1F19B, # �� SQUARED THREE D
77    0x1F19C, # �� SQUARED SECOND SCREEN
78    0x1F19D, # �� SQUARED TWO K;So;0;L;;;;;N;;;;;
79    0x1F19E, # �� SQUARED FOUR K;So;0;L;;;;;N;;;;;
80    0x1F19F, # �� SQUARED EIGHT K;So;0;L;;;;;N;;;;;
81    0x1F1A0, # �� SQUARED FIVE POINT ONE;So;0;L;;;;;N;;;;;
82    0x1F1A1, # �� SQUARED SEVEN POINT ONE;So;0;L;;;;;N;;;;;
83    0x1F1A2, # �� SQUARED TWENTY-TWO POINT TWO;So;0;L;;;;;N;;;;;
84    0x1F1A3, # �� SQUARED SIXTY P;So;0;L;;;;;N;;;;;
85    0x1F1A4, # �� SQUARED ONE HUNDRED TWENTY P;So;0;L;;;;;N;;;;;
86    0x1F1A5, # �� SQUARED LATIN SMALL LETTER D;So;0;L;;;;;N;;;;;
87    0x1F1A6, # �� SQUARED HC;So;0;L;;;;;N;;;;;
88    0x1F1A7, # �� SQUARED HDR;So;0;L;;;;;N;;;;;
89    0x1F1A8, # �� SQUARED HI-RES;So;0;L;;;;;N;;;;;
90    0x1F1A9, # �� SQUARED LOSSLESS;So;0;L;;;;;N;;;;;
91    0x1F1AA, # �� SQUARED SHV;So;0;L;;;;;N;;;;;
92    0x1F1AB, # �� SQUARED UHD;So;0;L;;;;;N;;;;;
93    0x1F1AC, # �� SQUARED VOD;So;0;L;;;;;N;;;;;
94    0x1F23B, # �� SQUARED CJK UNIFIED IDEOGRAPH-914D
95}
96
97# letter-based characters, provided by Roboto
98LETTERLIKE_CHARS_IN_ROBOTO = {
99    0x2100, # ℀ ACCOUNT OF
100    0x2101, # ℁ ADDRESSED TO THE SUBJECT
101    0x2103, # ℃ DEGREE CELSIUS
102    0x2105, # ℅ CARE OF
103    0x2106, # ℆ CADA UNA
104    0x2109, # ℉ DEGREE FAHRENHEIT
105    0x2113, # ℓ SCRIPT SMALL L
106    0x2116, # № NUMERO SIGN
107    0x2117, # ℗ SOUND RECORDING COPYRIGHT
108    0x211E, # ℞ PRESCRIPTION TAKE
109    0x211F, # ℟ RESPONSE
110    0x2120, # ℠ SERVICE MARK
111    0x2121, # ℡ TELEPHONE SIGN
112    0x2122, # ™ TRADE MARK SIGN
113    0x2123, # ℣ VERSICLE
114    0x2125, # ℥ OUNCE SIGN
115    0x2126, # Ω OHM SIGN
116    0x212A, # K KELVIN SIGN
117    0x212B, # Å ANGSTROM SIGN
118    0x212E, # ℮ ESTIMATED SYMBOL
119    0x2132, # Ⅎ TURNED CAPITAL F
120    0x213B, # ℻ FACSIMILE SIGN
121    0x214D, # ⅍ AKTIESELSKAB
122    0x214F, # ⅏ SYMBOL FOR SAMARITAN SOURCE
123}
124
125DEFAULT_EMOJI = unicode_data.get_presentation_default_emoji()
126
127EMOJI_ADDITIONS_FILE = os.path.join(
128    os.path.dirname(__file__), os.path.pardir, os.path.pardir,
129    'unicode', 'additions', 'emoji-data.txt')
130
131
132# Characters we have decided we are doing as emoji-style in Android,
133# despite UTR#51's recommendation
134def get_android_emoji():
135    """Return additional Android default emojis."""
136    android_emoji = set()
137    with open(EMOJI_ADDITIONS_FILE) as emoji_additions:
138        data = unicode_data._parse_semicolon_separated_data(
139            emoji_additions.read())
140        for codepoint, prop in data:
141            if prop == 'Emoji_Presentation':
142                android_emoji.add(int(codepoint, 16))
143    return android_emoji
144
145def rename_postscript_name(source_font, target_font, new_name):
146    """Rename the post script name to given one"""
147    with contextlib.closing(ttLib.TTFont(source_font)) as ttf:
148      nameTable = ttf['name']
149      for name in nameTable.names:
150        if (name.nameID == 6 and name.platformID == 3 and name.platEncID == 1
151            and name.langID == 0x0409):
152          name.string = new_name
153      ttf.save(target_font)
154
155def main(argv):
156    """Subset the Noto Symbols font.
157
158    The first argument is the source file name, and the second argument is
159    the target file name.
160    """
161
162    target_coverage = set()
163    # Add all characters in BLOCKS_TO_INCLUDE
164    for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
165        target_coverage.update(range(first, last+1))
166
167    # Add one-off characters
168    target_coverage |= ONE_OFF_ADDITIONS
169    # Remove characters preferably coming from Roboto
170    target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
171    # Remove characters that are supposed to default to emoji
172    android_emoji = get_android_emoji()
173    target_coverage -= DEFAULT_EMOJI | android_emoji
174
175    # Remove dentistry symbols, as their main use appears to be for CJK:
176    # http://www.unicode.org/L2/L2000/00098-n2195.pdf
177    target_coverage -= set(range(0x23BE, 0x23CC+1))
178
179    # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
180    # mechanism to work properly.
181    target_coverage.remove(0x20E3)
182
183    source_file_name = argv[1]
184    target_file_name = argv[2]
185
186    tmp = tempfile.NamedTemporaryFile()
187    subset.subset_font(
188        source_file_name,
189        tmp.name,
190        include=target_coverage)
191
192    # Use given file name as the PostScript name.
193    postScriptName = PurePosixPath(target_file_name).stem
194    rename_postscript_name(tmp.name, target_file_name, postScriptName)
195
196    second_subset_coverage = DEFAULT_EMOJI | android_emoji
197    second_subset_file_name = argv[3]
198    subset.subset_font(
199        source_file_name,
200        tmp.name,
201        include=second_subset_coverage)
202
203    # Use given file name as the PostScript name.
204    postScriptName = PurePosixPath(second_subset_file_name).stem
205    rename_postscript_name(tmp.name, second_subset_file_name, postScriptName)
206
207if __name__ == '__main__':
208    main(sys.argv)
209