1#!/usr/bin/python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
19under a meta tag with name 'Emji'.
20
21In order to create the final font the followings are used as inputs:
22
23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
24external/noto-fonts/emoji/NotoColorEmoji.ttf
25
26- Unicode files: Unicode files that are in the framework, and lists information about all the
27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
28and emoji-variation-sequences.txt. Currently at external/unicode/.
29
30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
31in the Android font. Resides in framework and currently under external/unicode/.
32
33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first
34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
35version that the emoji was added (compatAdded). Updated when the script is executed.
36
37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
38
39After execution the following files are generated if they don't exist otherwise, they are updated:
40- font/NotoColorEmojiCompat.ttf
41- supported-emojis/emojis.txt
42- data/emoji_metadata.txt
43- src/java/android/support/text/emoji/flatbuffer/*
44"""
45
46from __future__ import print_function
47
48import contextlib
49import csv
50import hashlib
51import itertools
52import json
53import os
54import shutil
55import sys
56import tempfile
57from fontTools import ttLib
58
59########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
60# Last Android SDK Version
61SDK_VERSION = 28
62# metadata version that will be embedded into font. If there are updates to the font that would
63# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
64# defines in which EmojiCompat metadata version the emoji is added to the font.
65METADATA_VERSION = 3
66
67####### main directories where output files are created #######
68SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
69FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
70DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
71SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
72JAVA_SRC_DIR = os.path.join(SCRIPT_DIR, 'src', 'java')
73####### output files #######
74# font file
75FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
76# emoji metadata json output file
77OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
78# emojis test file
79TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
80####### input files #######
81# Unicode file names to read emoji data
82EMOJI_DATA_FILE = 'emoji-data.txt'
83EMOJI_SEQ_FILE = 'emoji-sequences.txt'
84EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
85EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
86# Android OS emoji file for emojis that are not in Unicode files
87ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
88ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
89# Android OS emoji style override file. Codepoints that are rendered with emoji style by default
90# even though not defined so in <code>emoji-data.txt</code>.
91EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
92# emoji metadata file
93INPUT_META_FILE = OUTPUT_META_FILE
94# flatbuffer schema
95FLATBUFFER_SCHEMA = os.path.join(DATA_DIR, 'emoji_metadata.fbs')
96# file path for java header, it will be prepended to flatbuffer java files
97FLATBUFFER_HEADER = os.path.join(DATA_DIR, "flatbuffer_header.txt")
98# temporary emoji metadata json output file
99OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
100# temporary binary file generated by flatbuffer
101FLATBUFFER_BIN = 'emoji_metadata.bin'
102# directory representation for flatbuffer java package
103FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
104# temporary directory that contains flatbuffer java files
105FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
106FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
107FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
108# directory under source where flatbuffer java files will be copied into
109FLATBUFFER_JAVA_TARGET = os.path.join(JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
110# meta tag name used in the font to embed the emoji metadata. This value is also used in
111# MetadataListReader.java in order to locate the metadata location.
112EMOJI_META_TAG_NAME = 'Emji'
113
114EMOJI_STR = 'EMOJI'
115EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
116ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR]
117STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
118
119DEFAULT_EMOJI_ID = 0xF0001
120EMOJI_STYLE_VS = 0xFE0F
121
122def to_hex_str(value):
123    """Converts given int value to hex without the 0x prefix"""
124    return format(value, 'X')
125
126def hex_str_to_int(string):
127    """Convert a hex string into int"""
128    return int(string, 16)
129
130def codepoint_to_string(codepoints):
131    """Converts a list of codepoints into a string separated with space."""
132    return ' '.join([to_hex_str(x) for x in codepoints])
133
134def prepend_header_to_file(file_path):
135    """Prepends the header to the file. Used to update flatbuffer java files with header, comments
136    and annotations."""
137    with open(file_path, "r+") as original_file:
138        with open(FLATBUFFER_HEADER, "r") as copyright_file:
139            original_content = original_file.read()
140            start_index = original_content.index("public final class")
141            original_file.seek(0)
142            original_file.write(copyright_file.read() + "\n" + original_content[start_index:])
143
144
145def update_flatbuffer_java_files(flatbuffer_java_dir):
146    """Prepends headers to flatbuffer java files and copies to the final destination"""
147    tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
148    tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
149    prepend_header_to_file(tmp_metadata_list)
150    prepend_header_to_file(tmp_metadata_item)
151
152    if not os.path.exists(FLATBUFFER_JAVA_TARGET):
153        os.makedirs(FLATBUFFER_JAVA_TARGET)
154
155    shutil.copy(tmp_metadata_list, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_LIST_JAVA)
156    shutil.copy(tmp_metadata_item, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_ITEM_JAVA)
157
158def create_test_data(unicode_path):
159    """Read all the emojis in the unicode files and update the test file"""
160    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
161    lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
162
163    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
164    lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
165
166    # standardized variants contains a huge list of sequences, only read the ones that are emojis
167    # and also the ones with FE0F (emoji style)
168    standardized_variants_lines = read_emoji_lines(
169        os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
170    for line in standardized_variants_lines:
171        if STD_VARIANTS_EMOJI_STYLE in line:
172            lines.append(line)
173
174    emojis_set = set()
175    for line in lines:
176        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
177        emojis_set.add(codepoint_to_string(codepoints).upper())
178
179    emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
180    for line in emoji_data_lines:
181        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
182        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
183            continue
184        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
185        if is_emoji_style:
186            codepoints = [to_hex_str(x) for x in
187                          codepoints_for_emojirange(codepoints_range)]
188            emojis_set.update(codepoints)
189
190    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
191    #  finally add the android default emoji exceptions
192    emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
193
194    emojis_list = list(emojis_set)
195    emojis_list.sort()
196    with open(TEST_DATA_PATH, "w") as test_file:
197        for line in emojis_list:
198            test_file.write("%s\n" % line)
199
200class _EmojiData(object):
201    """Holds the information about a single emoji."""
202
203    def __init__(self, codepoints, is_emoji_style):
204        self.codepoints = codepoints
205        self.emoji_style = is_emoji_style
206        self.emoji_id = 0
207        self.width = 0
208        self.height = 0
209        self.sdk_added = SDK_VERSION
210        self.compat_added = METADATA_VERSION
211
212    def update_metrics(self, metrics):
213        """Updates width/height instance variables with the values given in metrics dictionary.
214        :param metrics: a dictionary object that has width and height values.
215        """
216        self.width = metrics.width
217        self.height = metrics.height
218
219    def __repr__(self):
220        return '<EmojiData {0} - {1}>'.format(self.emoji_style,
221                                              codepoint_to_string(self.codepoints))
222
223    def create_json_element(self):
224        """Creates the json representation of EmojiData."""
225        json_element = {}
226        json_element['id'] = self.emoji_id
227        json_element['emojiStyle'] = self.emoji_style
228        json_element['sdkAdded'] = self.sdk_added
229        json_element['compatAdded'] = self.compat_added
230        json_element['width'] = self.width
231        json_element['height'] = self.height
232        json_element['codepoints'] = self.codepoints
233        return json_element
234
235    def create_txt_row(self):
236        """Creates array of values for CSV of EmojiData."""
237        row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
238        row += [to_hex_str(x) for x in self.codepoints]
239        return row
240
241    def update(self, emoji_id, sdk_added, compat_added):
242        """Updates current EmojiData with the values in a json element"""
243        self.emoji_id = emoji_id
244        self.sdk_added = sdk_added
245        self.compat_added = compat_added
246
247
248def read_emoji_lines(file_path, optional=False):
249    """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
250    lines and comments
251    :param file_path: unicode emoji file path
252    :param optional: if True no exception is raised when the file cannot be read
253    :return: list of uppercase strings
254    """
255    result = []
256    try:
257        with open(file_path) as file_stream:
258            for line in file_stream:
259                line = line.strip()
260                if line and not line.startswith('#'):
261                    result.append(line.upper())
262    except IOError:
263        if optional:
264            pass
265        else:
266            raise
267
268    return result
269
270def get_emoji_style_exceptions(unicode_path):
271    """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
272    lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
273    exceptions = []
274    for line in lines:
275        codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
276        exceptions.append(codepoint)
277    return exceptions
278
279def codepoints_for_emojirange(codepoints_range):
280    """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
281    such as XYZ ... UVT
282    """
283    codepoints = []
284    if '..' in codepoints_range:
285        range_start, range_end = codepoints_range.split('..')
286        codepoints_range = range(hex_str_to_int(range_start),
287                                 hex_str_to_int(range_end) + 1)
288        codepoints.extend(codepoints_range)
289    else:
290        codepoints.append(hex_str_to_int(codepoints_range))
291    return codepoints
292
293def codepoints_and_emoji_prop(line):
294    """For a given emoji file line, return codepoints and emoji property in the line.
295    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
296    |Extended_Pictographic] # [...]"""
297    line = line.strip()
298    if '#' in line:
299        line = line[:line.index('#')]
300    else:
301        raise ValueError("Line is expected to have # in it")
302    line = line.split(';')
303    codepoints_range = line[0].strip()
304    emoji_property = line[1].strip()
305
306    return codepoints_range, emoji_property
307
308def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
309    """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
310    intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
311    A line format that is expected is as follows:
312    1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component
313    |Extended_Pictographic] # [...]"""
314    lines = read_emoji_lines(file_path)
315
316    for line in lines:
317        codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
318        if not emoji_property in ACCEPTED_EMOJI_PROPERTIES:
319            continue
320        is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
321        codepoints = codepoints_for_emojirange(codepoints_range)
322
323        for codepoint in codepoints:
324            key = codepoint_to_string([codepoint])
325            codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
326            if key in emoji_data_map:
327                # since there are multiple definitions of emojis, only update when emoji style is
328                # True
329                if codepoint_is_emoji_style:
330                    emoji_data_map[key].emoji_style = True
331            else:
332                emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
333                emoji_data_map[key] = emoji_data
334
335
336def read_emoji_sequences(emoji_data_map, file_path, optional=False):
337    """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
338    line and puts into emoji_data_map."""
339    lines = read_emoji_lines(file_path, optional)
340    # 1F1E6 1F1E8 ; Name ; [...]
341    for line in lines:
342        codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
343        codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
344        key = codepoint_to_string(codepoints)
345        if not key in emoji_data_map:
346            emoji_data = _EmojiData(codepoints, False)
347            emoji_data_map[key] = emoji_data
348
349
350def load_emoji_data_map(unicode_path):
351    """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
352    :return: map of space separated codepoints to EmojiData
353    """
354    emoji_data_map = {}
355    emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
356    read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
357                         emoji_style_exceptions)
358    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
359    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))
360
361    # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
362    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
363                         optional=True)
364    # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
365    read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
366                         optional=True)
367
368    return emoji_data_map
369
370
371def load_previous_metadata(emoji_data_map):
372    """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
373       in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
374       emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
375       exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
376    current_emoji_id = DEFAULT_EMOJI_ID
377    if os.path.isfile(INPUT_META_FILE):
378        with open(INPUT_META_FILE) as csvfile:
379            reader = csv.reader(csvfile, delimiter=' ')
380            for row in reader:
381                if row[0].startswith('#'):
382                    continue
383                emoji_id = hex_str_to_int(row[0])
384                sdk_added = int(row[1])
385                compat_added = int(row[2])
386                key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
387                if key in emoji_data_map:
388                    emoji_data = emoji_data_map[key]
389                    emoji_data.update(emoji_id, sdk_added, compat_added)
390                    if emoji_data.emoji_id >= current_emoji_id:
391                        current_emoji_id = emoji_data.emoji_id + 1
392
393    return current_emoji_id
394
395
396def update_ttlib_orig_sort():
397    """Updates the ttLib tag sort with a closure that makes the meta table first."""
398    orig_sort = ttLib.sortedTagList
399
400    def meta_first_table_sort(tag_list, table_order=None):
401        """Sorts the tables with the original ttLib sort, then makes the meta table first."""
402        tag_list = orig_sort(tag_list, table_order)
403        tag_list.remove('meta')
404        tag_list.insert(0, 'meta')
405        return tag_list
406
407    ttLib.sortedTagList = meta_first_table_sort
408
409
410def inject_meta_into_font(ttf, flatbuffer_bin_filename):
411    """inject metadata binary into font"""
412    if not 'meta' in ttf:
413        ttf['meta'] = ttLib.getTableClass('meta')()
414    meta = ttf['meta']
415    with open(flatbuffer_bin_filename) as flatbuffer_bin_file:
416        meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
417
418    # sort meta tables for faster access
419    update_ttlib_orig_sort()
420
421
422def validate_input_files(font_path, unicode_path):
423    """Validate the existence of font file and the unicode files"""
424    if not os.path.isfile(font_path):
425        raise ValueError("Font file does not exist: " + font_path)
426
427    if not os.path.isdir(unicode_path):
428        raise ValueError(
429            "Unicode directory does not exist or is not a directory " + unicode_path)
430
431    emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
432                       os.path.join(unicode_path, EMOJI_ZWJ_FILE),
433                       os.path.join(unicode_path, EMOJI_SEQ_FILE)]
434    for emoji_filename in emoji_filenames:
435        if not os.path.isfile(emoji_filename):
436            raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
437
438
439def add_file_to_sha(sha_algo, file_path):
440    with open(file_path, 'rb') as input_file:
441        for data in iter(lambda: input_file.read(8192), ''):
442            sha_algo.update(data)
443
444def create_sha_from_source_files(font_paths):
445    """Creates a SHA from the given font files"""
446    sha_algo = hashlib.sha256()
447    for file_path in font_paths:
448        add_file_to_sha(sha_algo, file_path)
449    return sha_algo.hexdigest()
450
451
452class EmojiFontCreator(object):
453    """Creates the EmojiCompat font"""
454
455    def __init__(self, font_path, unicode_path):
456        validate_input_files(font_path, unicode_path)
457
458        self.font_path = font_path
459        self.unicode_path = unicode_path
460        self.emoji_data_map = {}
461        self.remapped_codepoints = {}
462        self.glyph_to_image_metrics_map = {}
463        # set default emoji id to start of Supplemental Private Use Area-A
464        self.emoji_id = DEFAULT_EMOJI_ID
465
466    def update_emoji_data(self, codepoints, glyph_name):
467        """Updates the existing EmojiData identified with codepoints. The fields that are set are:
468        - emoji_id (if it does not exist)
469        - image width/height"""
470        key = codepoint_to_string(codepoints)
471        if key in self.emoji_data_map:
472            # add emoji to final data
473            emoji_data = self.emoji_data_map[key]
474            emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
475            if emoji_data.emoji_id == 0:
476                emoji_data.emoji_id = self.emoji_id
477                self.emoji_id = self.emoji_id + 1
478            self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
479
480    def read_cbdt(self, ttf):
481        """Read image size data from CBDT."""
482        cbdt = ttf['CBDT']
483        for strike_data in cbdt.strikeData:
484            for key, data in strike_data.iteritems():
485                data.decompile()
486                self.glyph_to_image_metrics_map[key] = data.metrics
487
488    def read_cmap12(self, ttf, glyph_to_codepoint_map):
489        """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
490        finally clears all elements in CMAP 12"""
491        cmap = ttf['cmap']
492        for table in cmap.tables:
493            if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
494                for codepoint, glyph_name in table.cmap.iteritems():
495                    glyph_to_codepoint_map[glyph_name] = codepoint
496                    self.update_emoji_data([codepoint], glyph_name)
497                return table
498        raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
499
500    def read_gsub(self, ttf, glyph_to_codepoint_map):
501        """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
502        gsub = ttf['GSUB']
503        ligature_subtables = []
504        context_subtables = []
505        # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
506        # and would be expensive with little value
507        for lookup in gsub.table.LookupList.Lookup:
508            for subtable in lookup.SubTable:
509                if subtable.LookupType == 5:
510                    context_subtables.append(subtable)
511                elif subtable.LookupType == 4:
512                    ligature_subtables.append(subtable)
513
514        for subtable in context_subtables:
515            self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
516
517        for subtable in ligature_subtables:
518            self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
519
520    def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
521        """Add substitutions defined as OpenType Context Substitution"""
522        for sub_class_set in subtable.SubClassSet:
523            if sub_class_set:
524                for sub_class_rule in sub_class_set.SubClassRule:
525                    # prepare holder for substitution list. each rule will have a list that is added
526                    # to the subs_list.
527                    subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
528                    for record in sub_class_rule.SubstLookupRecord:
529                        subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
530                                                                            record.LookupListIndex)
531                    # create combinations or all lists. the combinations will be filtered by
532                    # emoji_data_map. the first element that contain as a valid glyph will be used
533                    # as the final glyph
534                    combinations = list(itertools.product(*subs_list))
535                    for seq in combinations:
536                        glyph_names = [x["input"] for x in seq]
537                        codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
538                        outputs = [x["output"] for x in seq if x["output"]]
539                        nonempty_outputs = filter(lambda x: x.strip() , outputs)
540                        if len(nonempty_outputs) == 0:
541                            print("Warning: no output glyph is set for " + str(glyph_names))
542                            continue
543                        elif len(nonempty_outputs) > 1:
544                            print(
545                                "Warning: multiple glyph is set for "
546                                    + str(glyph_names) + ", will use the first one")
547
548                        glyph = nonempty_outputs[0]
549                        self.update_emoji_data(codepoints, glyph)
550
551    def get_substitutions(self, lookup_list, index):
552        result = []
553        for x in lookup_list.Lookup[index].SubTable:
554            for input, output in x.mapping.iteritems():
555                result.append({"input": input, "output": output})
556        return result
557
558    def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
559        for name, ligatures in subtable.ligatures.iteritems():
560            for ligature in ligatures:
561                glyph_names = [name] + ligature.Component
562                codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
563                self.update_emoji_data(codepoints, ligature.LigGlyph)
564
565    def write_metadata_json(self, output_json_file_path):
566        """Writes the emojis into a json file"""
567        output_json = {}
568        output_json['version'] = METADATA_VERSION
569        output_json['sourceSha'] = create_sha_from_source_files(
570            [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
571        output_json['list'] = []
572
573        emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
574
575        total_emoji_count = 0
576        for emoji_data in emoji_data_list:
577            element = emoji_data.create_json_element()
578            output_json['list'].append(element)
579            total_emoji_count = total_emoji_count + 1
580
581        # write the new json file to be processed by FlatBuffers
582        with open(output_json_file_path, 'w') as json_file:
583            print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
584                  file=json_file)
585
586        return total_emoji_count
587
588    def write_metadata_csv(self):
589        """Writes emoji metadata into space separated file"""
590        with open(OUTPUT_META_FILE, 'w') as csvfile:
591            csvwriter = csv.writer(csvfile, delimiter=' ')
592            emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
593            csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
594            for emoji_data in emoji_data_list:
595                csvwriter.writerow(emoji_data.create_txt_row())
596
597    def create_font(self):
598        """Creates the EmojiCompat font.
599        :param font_path: path to Android NotoColorEmoji font
600        :param unicode_path: path to directory that contains unicode files
601        """
602
603        tmp_dir = tempfile.mkdtemp()
604
605        # create emoji codepoints to EmojiData map
606        self.emoji_data_map = load_emoji_data_map(self.unicode_path)
607
608        # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
609        # returned is either default or 1 greater than the largest id in previous data
610        self.emoji_id = load_previous_metadata(self.emoji_data_map)
611
612        # recalcTimestamp parameter will keep the modified field same as the original font. Changing
613        # the modified field in the font causes the font ttf file to change, which makes it harder
614        # to understand if something really changed in the font.
615        with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
616            # read image size data
617            self.read_cbdt(ttf)
618
619            # glyph name to codepoint map
620            glyph_to_codepoint_map = {}
621
622            # read single codepoint emojis under cmap12 and clear the table contents
623            cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
624
625            # read emoji sequences gsub and clear the table contents
626            self.read_gsub(ttf, glyph_to_codepoint_map)
627
628            # add all new codepoint to glyph mappings
629            cmap12_table.cmap.update(self.remapped_codepoints)
630
631            # final metadata csv will be used to generate the sha, therefore write it before
632            # metadata json is written.
633            self.write_metadata_csv()
634
635            output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
636            flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
637            flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
638
639            total_emoji_count = self.write_metadata_json(output_json_file)
640
641            # create the flatbuffers binary and java classes
642            sys_command = 'flatc -o {0} -b -j {1} {2}'
643            os.system(sys_command.format(tmp_dir, FLATBUFFER_SCHEMA, output_json_file))
644
645            # inject metadata binary into font
646            inject_meta_into_font(ttf, flatbuffer_bin_file)
647
648            # update CBDT and CBLC versions since older android versions cannot read > 2.0
649            ttf['CBDT'].version = 2.0
650            ttf['CBLC'].version = 2.0
651
652            # save the new font
653            ttf.save(FONT_PATH)
654
655            update_flatbuffer_java_files(flatbuffer_java_dir)
656
657            create_test_data(self.unicode_path)
658
659            # clear the tmp output directory
660            shutil.rmtree(tmp_dir, ignore_errors=True)
661
662            print(
663                "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
664
665
666def print_usage():
667    """Prints how to use the script."""
668    print("Please specify a path to font and unicode files.\n"
669          "usage: createfont.py noto-color-emoji-path unicode-dir-path")
670
671
672if __name__ == '__main__':
673    if len(sys.argv) < 3:
674        print_usage()
675        sys.exit(1)
676    EmojiFontCreator(sys.argv[1], sys.argv[2]).create_font()
677