1#!/usr/bin/python
2# coding=UTF-8
3#
4# Copyright 2016 Google Inc. All rights reserved.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18"""Create a curated subset of Noto CJK for Android."""
19
20import os
21
22from fontTools import ttLib
23from nototools import font_data
24from nototools import tool_utils
25from nototools import ttc_utils
26
27# Characters supported in Noto CJK fonts that UTR #51 recommends default to
28# emoji-style.
29EMOJI_IN_CJK = {
30    0x26BD, # ⚽ SOCCER BALL
31    0x26BE, # ⚾ BASEBALL
32    0x1F18E, # �� NEGATIVE SQUARED AB
33    0x1F191, # �� SQUARED CL
34    0x1F192, # �� SQUARED COOL
35    0x1F193, # �� SQUARED FREE
36    0x1F194, # �� SQUARED ID
37    0x1F195, # �� SQUARED NEW
38    0x1F196, # �� SQUARED NG
39    0x1F197, # �� SQUARED OK
40    0x1F198, # �� SQUARED SOS
41    0x1F199, # �� SQUARED UP WITH EXCLAMATION MARK
42    0x1F19A, # �� SQUARED VS
43    0x1F201, # �� SQUARED KATAKANA KOKO
44    0x1F21A, # �� SQUARED CJK UNIFIED IDEOGRAPH-7121
45    0x1F22F, # �� SQUARED CJK UNIFIED IDEOGRAPH-6307
46    0x1F232, # �� SQUARED CJK UNIFIED IDEOGRAPH-7981
47    0x1F233, # �� SQUARED CJK UNIFIED IDEOGRAPH-7A7A
48    0x1F234, # �� SQUARED CJK UNIFIED IDEOGRAPH-5408
49    0x1F235, # �� SQUARED CJK UNIFIED IDEOGRAPH-6E80
50    0x1F236, # �� SQUARED CJK UNIFIED IDEOGRAPH-6709
51    0x1F238, # �� SQUARED CJK UNIFIED IDEOGRAPH-7533
52    0x1F239, # �� SQUARED CJK UNIFIED IDEOGRAPH-5272
53    0x1F23A, # �� SQUARED CJK UNIFIED IDEOGRAPH-55B6
54    0x1F250, # �� CIRCLED IDEOGRAPH ADVANTAGE
55    0x1F251, # �� CIRCLED IDEOGRAPH ACCEPT
56}
57
58# Characters we have decided we are doing as emoji-style in Android,
59# despite UTR #51's recommendation
60ANDROID_EMOJI = {
61    0x2600, # ☀ BLACK SUN WITH RAYS
62    0x2601, # ☁ CLOUD
63    0X260E, # ☎ BLACK TELEPHONE
64    0x261D, # ☝ WHITE UP POINTING INDEX
65    0x263A, # ☺ WHITE SMILING FACE
66    0x2660, # ♠ BLACK SPADE SUIT
67    0x2663, # ♣ BLACK CLUB SUIT
68    0x2665, # ♥ BLACK HEART SUIT
69    0x2666, # ♦ BLACK DIAMOND SUIT
70    0x270C, # ✌ VICTORY HAND
71    0x2744, # ❄ SNOWFLAKE
72    0x2764, # ❤ HEAVY BLACK HEART
73}
74
75# We don't want support for ASCII control chars.
76CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F');
77
78EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)
79
80
81def remove_from_cmap(infile, outfile, exclude=frozenset()):
82    """Removes a set of characters from a font file's cmap table."""
83    font = ttLib.TTFont(infile)
84    font_data.delete_from_cmap(font, exclude)
85    font.save(outfile)
86
87
88TEMP_DIR = 'subsetted'
89
90def remove_codepoints_from_ttc(ttc_name):
91    otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR)
92
93    with tool_utils.temp_chdir(TEMP_DIR):
94        for index, otf_name in enumerate(otf_names):
95            print 'Subsetting %s...' % otf_name
96            remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS)
97        ttc_utils.ttcfile_build(ttc_name, otf_names)
98        for f in otf_names:
99            os.remove(f)
100
101
102remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc')
103remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc')
104