1#!/usr/bin/python 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17""" 18Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format 19under a meta tag with name 'Emji'. 20 21In order to create the final font the followings are used as inputs: 22 23- NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at 24external/noto-fonts/emoji/NotoColorEmoji.ttf 25 26- Unicode files: Unicode files that are in the framework, and lists information about all the 27emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, 28and emoji-variation-sequences.txt. Currently at external/unicode/. 29 30- additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are 31in the Android font. Resides in framework and currently under external/unicode/. 32 33- data/emoji_metadata.txt: The file that includes the id, codepoints, the first 34Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font 35version that the emoji was added (compatAdded). Updated when the script is executed. 36 37- data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. 38 39After execution the following files are generated if they don't exist otherwise, they are updated: 40- font/NotoColorEmojiCompat.ttf 41- supported-emojis/emojis.txt 42- data/emoji_metadata.txt 43- src/java/android/support/text/emoji/flatbuffer/* 44""" 45 46from __future__ import print_function 47 48import contextlib 49import csv 50import hashlib 51import itertools 52import json 53import os 54import shutil 55import sys 56import tempfile 57from fontTools import ttLib 58 59########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### 60# Last Android SDK Version 61SDK_VERSION = 28 62# metadata version that will be embedded into font. If there are updates to the font that would 63# cause data/emoji_metadata.txt to change, this integer number should be incremented. This number 64# defines in which EmojiCompat metadata version the emoji is added to the font. 65METADATA_VERSION = 3 66 67####### main directories where output files are created ####### 68SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) 69FONT_DIR = os.path.join(SCRIPT_DIR, 'font') 70DATA_DIR = os.path.join(SCRIPT_DIR, 'data') 71SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') 72JAVA_SRC_DIR = os.path.join(SCRIPT_DIR, 'src', 'java') 73####### output files ####### 74# font file 75FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') 76# emoji metadata json output file 77OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') 78# emojis test file 79TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') 80####### input files ####### 81# Unicode file names to read emoji data 82EMOJI_DATA_FILE = 'emoji-data.txt' 83EMOJI_SEQ_FILE = 'emoji-sequences.txt' 84EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' 85EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' 86# Android OS emoji file for emojis that are not in Unicode files 87ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') 88ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') 89# Android OS emoji style override file. Codepoints that are rendered with emoji style by default 90# even though not defined so in <code>emoji-data.txt</code>. 91EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') 92# emoji metadata file 93INPUT_META_FILE = OUTPUT_META_FILE 94# flatbuffer schema 95FLATBUFFER_SCHEMA = os.path.join(DATA_DIR, 'emoji_metadata.fbs') 96# file path for java header, it will be prepended to flatbuffer java files 97FLATBUFFER_HEADER = os.path.join(DATA_DIR, "flatbuffer_header.txt") 98# temporary emoji metadata json output file 99OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' 100# temporary binary file generated by flatbuffer 101FLATBUFFER_BIN = 'emoji_metadata.bin' 102# directory representation for flatbuffer java package 103FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') 104# temporary directory that contains flatbuffer java files 105FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) 106FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" 107FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" 108# directory under source where flatbuffer java files will be copied into 109FLATBUFFER_JAVA_TARGET = os.path.join(JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) 110# meta tag name used in the font to embed the emoji metadata. This value is also used in 111# MetadataListReader.java in order to locate the metadata location. 112EMOJI_META_TAG_NAME = 'Emji' 113 114EMOJI_STR = 'EMOJI' 115EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' 116ACCEPTED_EMOJI_PROPERTIES = [EMOJI_PRESENTATION_STR, EMOJI_STR] 117STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' 118 119DEFAULT_EMOJI_ID = 0xF0001 120EMOJI_STYLE_VS = 0xFE0F 121 122def to_hex_str(value): 123 """Converts given int value to hex without the 0x prefix""" 124 return format(value, 'X') 125 126def hex_str_to_int(string): 127 """Convert a hex string into int""" 128 return int(string, 16) 129 130def codepoint_to_string(codepoints): 131 """Converts a list of codepoints into a string separated with space.""" 132 return ' '.join([to_hex_str(x) for x in codepoints]) 133 134def prepend_header_to_file(file_path): 135 """Prepends the header to the file. Used to update flatbuffer java files with header, comments 136 and annotations.""" 137 with open(file_path, "r+") as original_file: 138 with open(FLATBUFFER_HEADER, "r") as copyright_file: 139 original_content = original_file.read() 140 start_index = original_content.index("public final class") 141 original_file.seek(0) 142 original_file.write(copyright_file.read() + "\n" + original_content[start_index:]) 143 144 145def update_flatbuffer_java_files(flatbuffer_java_dir): 146 """Prepends headers to flatbuffer java files and copies to the final destination""" 147 tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA 148 tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA 149 prepend_header_to_file(tmp_metadata_list) 150 prepend_header_to_file(tmp_metadata_item) 151 152 if not os.path.exists(FLATBUFFER_JAVA_TARGET): 153 os.makedirs(FLATBUFFER_JAVA_TARGET) 154 155 shutil.copy(tmp_metadata_list, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_LIST_JAVA) 156 shutil.copy(tmp_metadata_item, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_ITEM_JAVA) 157 158def create_test_data(unicode_path): 159 """Read all the emojis in the unicode files and update the test file""" 160 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 161 lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) 162 163 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) 164 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) 165 166 # standardized variants contains a huge list of sequences, only read the ones that are emojis 167 # and also the ones with FE0F (emoji style) 168 standardized_variants_lines = read_emoji_lines( 169 os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) 170 for line in standardized_variants_lines: 171 if STD_VARIANTS_EMOJI_STYLE in line: 172 lines.append(line) 173 174 emojis_set = set() 175 for line in lines: 176 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 177 emojis_set.add(codepoint_to_string(codepoints).upper()) 178 179 emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) 180 for line in emoji_data_lines: 181 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 182 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 183 continue 184 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 185 if is_emoji_style: 186 codepoints = [to_hex_str(x) for x in 187 codepoints_for_emojirange(codepoints_range)] 188 emojis_set.update(codepoints) 189 190 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 191 # finally add the android default emoji exceptions 192 emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) 193 194 emojis_list = list(emojis_set) 195 emojis_list.sort() 196 with open(TEST_DATA_PATH, "w") as test_file: 197 for line in emojis_list: 198 test_file.write("%s\n" % line) 199 200class _EmojiData(object): 201 """Holds the information about a single emoji.""" 202 203 def __init__(self, codepoints, is_emoji_style): 204 self.codepoints = codepoints 205 self.emoji_style = is_emoji_style 206 self.emoji_id = 0 207 self.width = 0 208 self.height = 0 209 self.sdk_added = SDK_VERSION 210 self.compat_added = METADATA_VERSION 211 212 def update_metrics(self, metrics): 213 """Updates width/height instance variables with the values given in metrics dictionary. 214 :param metrics: a dictionary object that has width and height values. 215 """ 216 self.width = metrics.width 217 self.height = metrics.height 218 219 def __repr__(self): 220 return '<EmojiData {0} - {1}>'.format(self.emoji_style, 221 codepoint_to_string(self.codepoints)) 222 223 def create_json_element(self): 224 """Creates the json representation of EmojiData.""" 225 json_element = {} 226 json_element['id'] = self.emoji_id 227 json_element['emojiStyle'] = self.emoji_style 228 json_element['sdkAdded'] = self.sdk_added 229 json_element['compatAdded'] = self.compat_added 230 json_element['width'] = self.width 231 json_element['height'] = self.height 232 json_element['codepoints'] = self.codepoints 233 return json_element 234 235 def create_txt_row(self): 236 """Creates array of values for CSV of EmojiData.""" 237 row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] 238 row += [to_hex_str(x) for x in self.codepoints] 239 return row 240 241 def update(self, emoji_id, sdk_added, compat_added): 242 """Updates current EmojiData with the values in a json element""" 243 self.emoji_id = emoji_id 244 self.sdk_added = sdk_added 245 self.compat_added = compat_added 246 247 248def read_emoji_lines(file_path, optional=False): 249 """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty 250 lines and comments 251 :param file_path: unicode emoji file path 252 :param optional: if True no exception is raised when the file cannot be read 253 :return: list of uppercase strings 254 """ 255 result = [] 256 try: 257 with open(file_path) as file_stream: 258 for line in file_stream: 259 line = line.strip() 260 if line and not line.startswith('#'): 261 result.append(line.upper()) 262 except IOError: 263 if optional: 264 pass 265 else: 266 raise 267 268 return result 269 270def get_emoji_style_exceptions(unicode_path): 271 """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" 272 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) 273 exceptions = [] 274 for line in lines: 275 codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) 276 exceptions.append(codepoint) 277 return exceptions 278 279def codepoints_for_emojirange(codepoints_range): 280 """ Return codepoints given in emoji files. Expand the codepoints that are given as a range 281 such as XYZ ... UVT 282 """ 283 codepoints = [] 284 if '..' in codepoints_range: 285 range_start, range_end = codepoints_range.split('..') 286 codepoints_range = range(hex_str_to_int(range_start), 287 hex_str_to_int(range_end) + 1) 288 codepoints.extend(codepoints_range) 289 else: 290 codepoints.append(hex_str_to_int(codepoints_range)) 291 return codepoints 292 293def codepoints_and_emoji_prop(line): 294 """For a given emoji file line, return codepoints and emoji property in the line. 295 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 296 |Extended_Pictographic] # [...]""" 297 line = line.strip() 298 if '#' in line: 299 line = line[:line.index('#')] 300 else: 301 raise ValueError("Line is expected to have # in it") 302 line = line.split(';') 303 codepoints_range = line[0].strip() 304 emoji_property = line[1].strip() 305 306 return codepoints_range, emoji_property 307 308def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): 309 """Read unicode lines of unicode emoji file in which each line describes a set of codepoint 310 intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. 311 A line format that is expected is as follows: 312 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base|Emoji_Component 313 |Extended_Pictographic] # [...]""" 314 lines = read_emoji_lines(file_path) 315 316 for line in lines: 317 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 318 if not emoji_property in ACCEPTED_EMOJI_PROPERTIES: 319 continue 320 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 321 codepoints = codepoints_for_emojirange(codepoints_range) 322 323 for codepoint in codepoints: 324 key = codepoint_to_string([codepoint]) 325 codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions 326 if key in emoji_data_map: 327 # since there are multiple definitions of emojis, only update when emoji style is 328 # True 329 if codepoint_is_emoji_style: 330 emoji_data_map[key].emoji_style = True 331 else: 332 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) 333 emoji_data_map[key] = emoji_data 334 335 336def read_emoji_sequences(emoji_data_map, file_path, optional=False): 337 """Reads the content of the file which contains emoji sequences. Creates EmojiData for each 338 line and puts into emoji_data_map.""" 339 lines = read_emoji_lines(file_path, optional) 340 # 1F1E6 1F1E8 ; Name ; [...] 341 for line in lines: 342 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 343 codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] 344 key = codepoint_to_string(codepoints) 345 if not key in emoji_data_map: 346 emoji_data = _EmojiData(codepoints, False) 347 emoji_data_map[key] = emoji_data 348 349 350def load_emoji_data_map(unicode_path): 351 """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. 352 :return: map of space separated codepoints to EmojiData 353 """ 354 emoji_data_map = {} 355 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 356 read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), 357 emoji_style_exceptions) 358 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 359 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE)) 360 361 # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. 362 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), 363 optional=True) 364 # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. 365 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), 366 optional=True) 367 368 return emoji_data_map 369 370 371def load_previous_metadata(emoji_data_map): 372 """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields 373 in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest 374 emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not 375 exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" 376 current_emoji_id = DEFAULT_EMOJI_ID 377 if os.path.isfile(INPUT_META_FILE): 378 with open(INPUT_META_FILE) as csvfile: 379 reader = csv.reader(csvfile, delimiter=' ') 380 for row in reader: 381 if row[0].startswith('#'): 382 continue 383 emoji_id = hex_str_to_int(row[0]) 384 sdk_added = int(row[1]) 385 compat_added = int(row[2]) 386 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) 387 if key in emoji_data_map: 388 emoji_data = emoji_data_map[key] 389 emoji_data.update(emoji_id, sdk_added, compat_added) 390 if emoji_data.emoji_id >= current_emoji_id: 391 current_emoji_id = emoji_data.emoji_id + 1 392 393 return current_emoji_id 394 395 396def update_ttlib_orig_sort(): 397 """Updates the ttLib tag sort with a closure that makes the meta table first.""" 398 orig_sort = ttLib.sortedTagList 399 400 def meta_first_table_sort(tag_list, table_order=None): 401 """Sorts the tables with the original ttLib sort, then makes the meta table first.""" 402 tag_list = orig_sort(tag_list, table_order) 403 tag_list.remove('meta') 404 tag_list.insert(0, 'meta') 405 return tag_list 406 407 ttLib.sortedTagList = meta_first_table_sort 408 409 410def inject_meta_into_font(ttf, flatbuffer_bin_filename): 411 """inject metadata binary into font""" 412 if not 'meta' in ttf: 413 ttf['meta'] = ttLib.getTableClass('meta')() 414 meta = ttf['meta'] 415 with open(flatbuffer_bin_filename) as flatbuffer_bin_file: 416 meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() 417 418 # sort meta tables for faster access 419 update_ttlib_orig_sort() 420 421 422def validate_input_files(font_path, unicode_path): 423 """Validate the existence of font file and the unicode files""" 424 if not os.path.isfile(font_path): 425 raise ValueError("Font file does not exist: " + font_path) 426 427 if not os.path.isdir(unicode_path): 428 raise ValueError( 429 "Unicode directory does not exist or is not a directory " + unicode_path) 430 431 emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), 432 os.path.join(unicode_path, EMOJI_ZWJ_FILE), 433 os.path.join(unicode_path, EMOJI_SEQ_FILE)] 434 for emoji_filename in emoji_filenames: 435 if not os.path.isfile(emoji_filename): 436 raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) 437 438 439def add_file_to_sha(sha_algo, file_path): 440 with open(file_path, 'rb') as input_file: 441 for data in iter(lambda: input_file.read(8192), ''): 442 sha_algo.update(data) 443 444def create_sha_from_source_files(font_paths): 445 """Creates a SHA from the given font files""" 446 sha_algo = hashlib.sha256() 447 for file_path in font_paths: 448 add_file_to_sha(sha_algo, file_path) 449 return sha_algo.hexdigest() 450 451 452class EmojiFontCreator(object): 453 """Creates the EmojiCompat font""" 454 455 def __init__(self, font_path, unicode_path): 456 validate_input_files(font_path, unicode_path) 457 458 self.font_path = font_path 459 self.unicode_path = unicode_path 460 self.emoji_data_map = {} 461 self.remapped_codepoints = {} 462 self.glyph_to_image_metrics_map = {} 463 # set default emoji id to start of Supplemental Private Use Area-A 464 self.emoji_id = DEFAULT_EMOJI_ID 465 466 def update_emoji_data(self, codepoints, glyph_name): 467 """Updates the existing EmojiData identified with codepoints. The fields that are set are: 468 - emoji_id (if it does not exist) 469 - image width/height""" 470 key = codepoint_to_string(codepoints) 471 if key in self.emoji_data_map: 472 # add emoji to final data 473 emoji_data = self.emoji_data_map[key] 474 emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) 475 if emoji_data.emoji_id == 0: 476 emoji_data.emoji_id = self.emoji_id 477 self.emoji_id = self.emoji_id + 1 478 self.remapped_codepoints[emoji_data.emoji_id] = glyph_name 479 480 def read_cbdt(self, ttf): 481 """Read image size data from CBDT.""" 482 cbdt = ttf['CBDT'] 483 for strike_data in cbdt.strikeData: 484 for key, data in strike_data.iteritems(): 485 data.decompile() 486 self.glyph_to_image_metrics_map[key] = data.metrics 487 488 def read_cmap12(self, ttf, glyph_to_codepoint_map): 489 """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and 490 finally clears all elements in CMAP 12""" 491 cmap = ttf['cmap'] 492 for table in cmap.tables: 493 if table.format == 12 and table.platformID == 3 and table.platEncID == 10: 494 for codepoint, glyph_name in table.cmap.iteritems(): 495 glyph_to_codepoint_map[glyph_name] = codepoint 496 self.update_emoji_data([codepoint], glyph_name) 497 return table 498 raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") 499 500 def read_gsub(self, ttf, glyph_to_codepoint_map): 501 """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" 502 gsub = ttf['GSUB'] 503 ligature_subtables = [] 504 context_subtables = [] 505 # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat 506 # and would be expensive with little value 507 for lookup in gsub.table.LookupList.Lookup: 508 for subtable in lookup.SubTable: 509 if subtable.LookupType == 5: 510 context_subtables.append(subtable) 511 elif subtable.LookupType == 4: 512 ligature_subtables.append(subtable) 513 514 for subtable in context_subtables: 515 self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) 516 517 for subtable in ligature_subtables: 518 self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) 519 520 def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): 521 """Add substitutions defined as OpenType Context Substitution""" 522 for sub_class_set in subtable.SubClassSet: 523 if sub_class_set: 524 for sub_class_rule in sub_class_set.SubClassRule: 525 # prepare holder for substitution list. each rule will have a list that is added 526 # to the subs_list. 527 subs_list = len(sub_class_rule.SubstLookupRecord) * [None] 528 for record in sub_class_rule.SubstLookupRecord: 529 subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, 530 record.LookupListIndex) 531 # create combinations or all lists. the combinations will be filtered by 532 # emoji_data_map. the first element that contain as a valid glyph will be used 533 # as the final glyph 534 combinations = list(itertools.product(*subs_list)) 535 for seq in combinations: 536 glyph_names = [x["input"] for x in seq] 537 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 538 outputs = [x["output"] for x in seq if x["output"]] 539 nonempty_outputs = filter(lambda x: x.strip() , outputs) 540 if len(nonempty_outputs) == 0: 541 print("Warning: no output glyph is set for " + str(glyph_names)) 542 continue 543 elif len(nonempty_outputs) > 1: 544 print( 545 "Warning: multiple glyph is set for " 546 + str(glyph_names) + ", will use the first one") 547 548 glyph = nonempty_outputs[0] 549 self.update_emoji_data(codepoints, glyph) 550 551 def get_substitutions(self, lookup_list, index): 552 result = [] 553 for x in lookup_list.Lookup[index].SubTable: 554 for input, output in x.mapping.iteritems(): 555 result.append({"input": input, "output": output}) 556 return result 557 558 def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): 559 for name, ligatures in subtable.ligatures.iteritems(): 560 for ligature in ligatures: 561 glyph_names = [name] + ligature.Component 562 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 563 self.update_emoji_data(codepoints, ligature.LigGlyph) 564 565 def write_metadata_json(self, output_json_file_path): 566 """Writes the emojis into a json file""" 567 output_json = {} 568 output_json['version'] = METADATA_VERSION 569 output_json['sourceSha'] = create_sha_from_source_files( 570 [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) 571 output_json['list'] = [] 572 573 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 574 575 total_emoji_count = 0 576 for emoji_data in emoji_data_list: 577 element = emoji_data.create_json_element() 578 output_json['list'].append(element) 579 total_emoji_count = total_emoji_count + 1 580 581 # write the new json file to be processed by FlatBuffers 582 with open(output_json_file_path, 'w') as json_file: 583 print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), 584 file=json_file) 585 586 return total_emoji_count 587 588 def write_metadata_csv(self): 589 """Writes emoji metadata into space separated file""" 590 with open(OUTPUT_META_FILE, 'w') as csvfile: 591 csvwriter = csv.writer(csvfile, delimiter=' ') 592 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 593 csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) 594 for emoji_data in emoji_data_list: 595 csvwriter.writerow(emoji_data.create_txt_row()) 596 597 def create_font(self): 598 """Creates the EmojiCompat font. 599 :param font_path: path to Android NotoColorEmoji font 600 :param unicode_path: path to directory that contains unicode files 601 """ 602 603 tmp_dir = tempfile.mkdtemp() 604 605 # create emoji codepoints to EmojiData map 606 self.emoji_data_map = load_emoji_data_map(self.unicode_path) 607 608 # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is 609 # returned is either default or 1 greater than the largest id in previous data 610 self.emoji_id = load_previous_metadata(self.emoji_data_map) 611 612 # recalcTimestamp parameter will keep the modified field same as the original font. Changing 613 # the modified field in the font causes the font ttf file to change, which makes it harder 614 # to understand if something really changed in the font. 615 with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: 616 # read image size data 617 self.read_cbdt(ttf) 618 619 # glyph name to codepoint map 620 glyph_to_codepoint_map = {} 621 622 # read single codepoint emojis under cmap12 and clear the table contents 623 cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) 624 625 # read emoji sequences gsub and clear the table contents 626 self.read_gsub(ttf, glyph_to_codepoint_map) 627 628 # add all new codepoint to glyph mappings 629 cmap12_table.cmap.update(self.remapped_codepoints) 630 631 # final metadata csv will be used to generate the sha, therefore write it before 632 # metadata json is written. 633 self.write_metadata_csv() 634 635 output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) 636 flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) 637 flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) 638 639 total_emoji_count = self.write_metadata_json(output_json_file) 640 641 # create the flatbuffers binary and java classes 642 sys_command = 'flatc -o {0} -b -j {1} {2}' 643 os.system(sys_command.format(tmp_dir, FLATBUFFER_SCHEMA, output_json_file)) 644 645 # inject metadata binary into font 646 inject_meta_into_font(ttf, flatbuffer_bin_file) 647 648 # update CBDT and CBLC versions since older android versions cannot read > 2.0 649 ttf['CBDT'].version = 2.0 650 ttf['CBLC'].version = 2.0 651 652 # save the new font 653 ttf.save(FONT_PATH) 654 655 update_flatbuffer_java_files(flatbuffer_java_dir) 656 657 create_test_data(self.unicode_path) 658 659 # clear the tmp output directory 660 shutil.rmtree(tmp_dir, ignore_errors=True) 661 662 print( 663 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) 664 665 666def print_usage(): 667 """Prints how to use the script.""" 668 print("Please specify a path to font and unicode files.\n" 669 "usage: createfont.py noto-color-emoji-path unicode-dir-path") 670 671 672if __name__ == '__main__': 673 if len(sys.argv) < 3: 674 print_usage() 675 sys.exit(1) 676 EmojiFontCreator(sys.argv[1], sys.argv[2]).create_font() 677