1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from icutools.databuilder import *
9from icutools.databuilder import utils
10from icutools.databuilder.request_types import *
11
12import os
13import sys
14
15
16def generate(config, io, common_vars):
17    requests = []
18
19    if len(io.glob("misc/*")) == 0:
20        print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
21        exit(1)
22
23    requests += generate_cnvalias(config, io, common_vars)
24    requests += generate_ulayout(config, io, common_vars)
25    requests += generate_confusables(config, io, common_vars)
26    requests += generate_conversion_mappings(config, io, common_vars)
27    requests += generate_brkitr_brk(config, io, common_vars)
28    requests += generate_stringprep(config, io, common_vars)
29    requests += generate_brkitr_dictionaries(config, io, common_vars)
30    requests += generate_normalization(config, io, common_vars)
31    requests += generate_coll_ucadata(config, io, common_vars)
32    requests += generate_full_unicore_data(config, io, common_vars)
33    requests += generate_unames(config, io, common_vars)
34    requests += generate_misc(config, io, common_vars)
35    requests += generate_curr_supplemental(config, io, common_vars)
36    requests += generate_zone_supplemental(config, io, common_vars)
37    requests += generate_translit(config, io, common_vars)
38
39    # Res Tree Files
40    # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
41    requests += generate_tree(config, io, common_vars,
42        "locales",
43        None,
44        config.use_pool_bundle,
45        [])
46
47    requests += generate_tree(config, io, common_vars,
48        "curr",
49        "curr",
50        config.use_pool_bundle,
51        [])
52
53    requests += generate_tree(config, io, common_vars,
54        "lang",
55        "lang",
56        config.use_pool_bundle,
57        [])
58
59    requests += generate_tree(config, io, common_vars,
60        "region",
61        "region",
62        config.use_pool_bundle,
63        [])
64
65    requests += generate_tree(config, io, common_vars,
66        "zone",
67        "zone",
68        config.use_pool_bundle,
69        [])
70
71    requests += generate_tree(config, io, common_vars,
72        "unit",
73        "unit",
74        config.use_pool_bundle,
75        [])
76
77    requests += generate_tree(config, io, common_vars,
78        "coll",
79        "coll",
80        # Never use pool bundle for coll, brkitr, or rbnf
81        False,
82        # Depends on timezoneTypes.res and keyTypeData.res.
83        # TODO: We should not need this dependency to build collation.
84        # TODO: Bake keyTypeData.res into the common library?
85        [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
86
87    requests += generate_tree(config, io, common_vars,
88        "brkitr",
89        "brkitr",
90        # Never use pool bundle for coll, brkitr, or rbnf
91        False,
92        [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
93
94    requests += generate_tree(config, io, common_vars,
95        "rbnf",
96        "rbnf",
97        # Never use pool bundle for coll, brkitr, or rbnf
98        False,
99        [])
100
101    requests += [
102        ListRequest(
103            name = "icudata_list",
104            variable_name = "icudata_all_output_files",
105            output_file = TmpFile("icudata.lst"),
106            include_tmp = False
107        )
108    ]
109
110    return requests
111
112
113def generate_cnvalias(config, io, common_vars):
114    # UConv Name Aliases
115    input_file = InFile("mappings/convrtrs.txt")
116    output_file = OutFile("cnvalias.icu")
117    return [
118        SingleExecutionRequest(
119            name = "cnvalias",
120            category = "cnvalias",
121            dep_targets = [],
122            input_files = [input_file],
123            output_files = [output_file],
124            tool = IcuTool("gencnval"),
125            args = "-s {IN_DIR} -d {OUT_DIR} "
126                "{INPUT_FILES[0]}",
127            format_with = {}
128        )
129    ]
130
131
132def generate_confusables(config, io, common_vars):
133    # CONFUSABLES
134    txt1 = InFile("unidata/confusables.txt")
135    txt2 = InFile("unidata/confusablesWholeScript.txt")
136    cfu = OutFile("confusables.cfu")
137    return [
138        SingleExecutionRequest(
139            name = "confusables",
140            category = "confusables",
141            dep_targets = [DepTarget("cnvalias")],
142            input_files = [txt1, txt2],
143            output_files = [cfu],
144            tool = IcuTool("gencfu"),
145            args = "-d {OUT_DIR} -i {OUT_DIR} "
146                "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
147                "-o {OUTPUT_FILES[0]}",
148            format_with = {}
149        )
150    ]
151
152
153def generate_conversion_mappings(config, io, common_vars):
154    # UConv Conversion Table Files
155    input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")]
156    output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
157    # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
158    return [
159        RepeatedOrSingleExecutionRequest(
160            name = "conversion_mappings",
161            category = "conversion_mappings",
162            dep_targets = [],
163            input_files = input_files,
164            output_files = output_files,
165            tool = IcuTool("makeconv"),
166            # BEGIN android-changed
167            # args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
168            args = "-s {IN_DIR} -d {OUT_DIR} -c --small {INPUT_FILE_PLACEHOLDER}",
169            # END android-changed
170            format_with = {},
171            repeat_with = {
172                "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
173            }
174        )
175    ]
176
177
178def generate_brkitr_brk(config, io, common_vars):
179    # BRK Files
180    input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")]
181    output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
182    return [
183        RepeatedExecutionRequest(
184            name = "brkitr_brk",
185            category = "brkitr_rules",
186            dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
187            input_files = input_files,
188            output_files = output_files,
189            tool = IcuTool("genbrk"),
190            args = "-d {OUT_DIR} -i {OUT_DIR} "
191                "-c -r {IN_DIR}/{INPUT_FILE} "
192                "-o {OUTPUT_FILE}",
193            format_with = {},
194            repeat_with = {}
195        )
196    ]
197
198
199def generate_stringprep(config, io, common_vars):
200    # SPP FILES
201    input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")]
202    output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
203    bundle_names = [v.filename[6:-4] for v in input_files]
204    return [
205        RepeatedExecutionRequest(
206            name = "stringprep",
207            category = "stringprep",
208            dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
209            input_files = input_files,
210            output_files = output_files,
211            tool = IcuTool("gensprep"),
212            args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
213                "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
214            format_with = {},
215            repeat_with = {
216                "BUNDLE_NAME": bundle_names
217            }
218        )
219    ]
220
221
222def generate_brkitr_dictionaries(config, io, common_vars):
223    # Dict Files
224    input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")]
225    output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
226    extra_options_map = {
227        "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
228        "brkitr/dictionaries/cjdict.txt": "--uchars",
229        "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
230        "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
231        "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
232    }
233    extra_optionses = [extra_options_map[v.filename] for v in input_files]
234    return [
235        RepeatedExecutionRequest(
236            name = "dictionaries",
237            category = "brkitr_dictionaries",
238            dep_targets = [],
239            input_files = input_files,
240            output_files = output_files,
241            tool = IcuTool("gendict"),
242            args = "-i {OUT_DIR} "
243                "-c {EXTRA_OPTIONS} "
244                "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
245            format_with = {},
246            repeat_with = {
247                "EXTRA_OPTIONS": extra_optionses
248            }
249        )
250    ]
251
252
253def generate_normalization(config, io, common_vars):
254    # NRM Files
255    input_files = [InFile(filename) for filename in io.glob("in/*.nrm")]
256    # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
257    input_files.remove(InFile("in/nfc.nrm"))
258    output_files = [OutFile(v.filename[3:]) for v in input_files]
259    return [
260        RepeatedExecutionRequest(
261            name = "normalization",
262            category = "normalization",
263            dep_targets = [],
264            input_files = input_files,
265            output_files = output_files,
266            tool = IcuTool("icupkg"),
267            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
268            format_with = {},
269            repeat_with = {}
270        )
271    ]
272
273
274def generate_coll_ucadata(config, io, common_vars):
275    # Collation Dependency File (ucadata.icu)
276    input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
277    output_file = OutFile("coll/ucadata.icu")
278    return [
279        SingleExecutionRequest(
280            name = "coll_ucadata",
281            category = "coll_ucadata",
282            dep_targets = [],
283            input_files = [input_file],
284            output_files = [output_file],
285            tool = IcuTool("icupkg"),
286            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
287            format_with = {}
288        )
289    ]
290
291
292def generate_full_unicore_data(config, io, common_vars):
293    # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
294    # are hardcoded in the common DLL and therefore not included in the data package any more.
295    # They are not built by default but need to be built for ICU4J data,
296    # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
297    # See ICU-4497.
298    if not config.include_uni_core_data:
299        return []
300
301    basenames = [
302        "pnames.icu",
303        "uprops.icu",
304        "ucase.icu",
305        "ubidi.icu",
306        "nfc.nrm"
307    ]
308    input_files = [InFile("in/%s" % bn) for bn in basenames]
309    output_files = [OutFile(bn) for bn in basenames]
310    return [
311        RepeatedExecutionRequest(
312            name = "unicore",
313            category = "unicore",
314            input_files = input_files,
315            output_files = output_files,
316            tool = IcuTool("icupkg"),
317            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
318        )
319    ]
320
321
322def generate_unames(config, io, common_vars):
323    # Unicode Character Names
324    input_file = InFile("in/unames.icu")
325    output_file = OutFile("unames.icu")
326    return [
327        SingleExecutionRequest(
328            name = "unames",
329            category = "unames",
330            dep_targets = [],
331            input_files = [input_file],
332            output_files = [output_file],
333            tool = IcuTool("icupkg"),
334            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
335            format_with = {}
336        )
337    ]
338
339
340def generate_ulayout(config, io, common_vars):
341    # Unicode text layout properties
342    basename = "ulayout"
343    input_file = InFile("in/%s.icu" % basename)
344    output_file = OutFile("%s.icu" % basename)
345    return [
346        SingleExecutionRequest(
347            name = basename,
348            category = basename,
349            dep_targets = [],
350            input_files = [input_file],
351            output_files = [output_file],
352            tool = IcuTool("icupkg"),
353            args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
354            format_with = {}
355        )
356    ]
357
358
359def generate_misc(config, io, common_vars):
360    # Misc Data Res Files
361    input_files = [InFile(filename) for filename in io.glob("misc/*.txt")]
362    input_basenames = [v.filename[5:] for v in input_files]
363    output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
364    return [
365        RepeatedExecutionRequest(
366            name = "misc_res",
367            category = "misc",
368            dep_targets = [DepTarget("cnvalias")], # ICU-21175
369            input_files = input_files,
370            output_files = output_files,
371            tool = IcuTool("genrb"),
372            args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
373                "-k -q "
374                "{INPUT_BASENAME}",
375            format_with = {},
376            repeat_with = {
377                "INPUT_BASENAME": input_basenames
378            }
379        )
380    ]
381
382
383def generate_curr_supplemental(config, io, common_vars):
384    # Currency Supplemental Res File
385    input_file = InFile("curr/supplementalData.txt")
386    input_basename = "supplementalData.txt"
387    output_file = OutFile("curr/supplementalData.res")
388    return [
389        SingleExecutionRequest(
390            name = "curr_supplemental_res",
391            category = "curr_supplemental",
392            dep_targets = [],
393            input_files = [input_file],
394            output_files = [output_file],
395            tool = IcuTool("genrb"),
396            args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
397                "-k "
398                "{INPUT_BASENAME}",
399            format_with = {
400                "INPUT_BASENAME": input_basename
401            }
402        )
403    ]
404
405
406def generate_zone_supplemental(config, io, common_vars):
407    # tzdbNames Res File
408    input_file = InFile("zone/tzdbNames.txt")
409    input_basename = "tzdbNames.txt"
410    output_file = OutFile("zone/tzdbNames.res")
411    return [
412        SingleExecutionRequest(
413            name = "zone_supplemental_res",
414            category = "zone_supplemental",
415            dep_targets = [],
416            input_files = [input_file],
417            output_files = [output_file],
418            tool = IcuTool("genrb"),
419            args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} "
420                "-k "
421                "{INPUT_BASENAME}",
422            format_with = {
423                "INPUT_BASENAME": input_basename
424            }
425        )
426    ]
427
428
429def generate_translit(config, io, common_vars):
430    input_files = [
431        InFile("translit/root.txt"),
432        InFile("translit/en.txt"),
433        InFile("translit/el.txt")
434    ]
435    dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt"))
436    dep_files -= set(input_files)
437    dep_files = list(sorted(dep_files))
438    input_basenames = [v.filename[9:] for v in input_files]
439    output_files = [
440        OutFile("translit/%s.res" % v[:-4])
441        for v in input_basenames
442    ]
443    return [
444        RepeatedOrSingleExecutionRequest(
445            name = "translit_res",
446            category = "translit",
447            dep_targets = dep_files,
448            input_files = input_files,
449            output_files = output_files,
450            tool = IcuTool("genrb"),
451            args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
452                "-k "
453                "{INPUT_BASENAME}",
454            format_with = {
455            },
456            repeat_with = {
457                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
458            }
459        )
460    ]
461
462
463def generate_tree(
464        config,
465        io,
466        common_vars,
467        sub_dir,
468        out_sub_dir,
469        use_pool_bundle,
470        dep_targets):
471    requests = []
472    category = "%s_tree" % sub_dir
473    out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
474    input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)]
475    if sub_dir == "curr":
476        input_files.remove(InFile("curr/supplementalData.txt"))
477    if sub_dir == "zone":
478        input_files.remove(InFile("zone/tzdbNames.txt"))
479    input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
480    output_files = [
481        OutFile("%s%s.res" % (out_prefix, v[:-4]))
482        for v in input_basenames
483    ]
484
485    # Generate Pool Bundle
486    if use_pool_bundle:
487        input_pool_files = [OutFile("%spool.res" % out_prefix)]
488        pool_target_name = "%s_pool_write" % sub_dir
489        use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
490            OUT_PREFIX = out_prefix,
491            **common_vars
492        )
493        requests += [
494            SingleExecutionRequest(
495                name = pool_target_name,
496                category = category,
497                dep_targets = dep_targets,
498                input_files = input_files,
499                output_files = input_pool_files,
500                tool = IcuTool("genrb"),
501                args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
502                    "--writePoolBundle -k "
503                    "{INPUT_BASENAMES_SPACED}",
504                format_with = {
505                    "IN_SUB_DIR": sub_dir,
506                    "OUT_PREFIX": out_prefix,
507                    "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
508                }
509            ),
510        ]
511        dep_targets = dep_targets + [DepTarget(pool_target_name)]
512    else:
513        use_pool_bundle_option = ""
514
515    # Generate Res File Tree
516    requests += [
517        RepeatedOrSingleExecutionRequest(
518            name = "%s_res" % sub_dir,
519            category = category,
520            dep_targets = dep_targets,
521            input_files = input_files,
522            output_files = output_files,
523            tool = IcuTool("genrb"),
524            # BEGIN android-changed
525            args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} " +
526                ("--omitCollationRules " if sub_dir == "coll" else "") +
527                "{EXTRA_OPTION} -k "
528                "{INPUT_BASENAME}",
529            # END android-changed
530            format_with = {
531                "IN_SUB_DIR": sub_dir,
532                "OUT_PREFIX": out_prefix,
533                "EXTRA_OPTION": use_pool_bundle_option
534            },
535            repeat_with = {
536                "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
537            }
538        )
539    ]
540
541    # Generate res_index file
542    # Exclude the deprecated locale variants and root; see ICU-20628. This
543    # could be data-driven, but we do not want to perform I/O in this script
544    # (for example, we do not want to read from an XML file).
545    excluded_locales = set([
546        "ja_JP_TRADITIONAL",
547        "th_TH_TRADITIONAL",
548        "de_",
549        "de__PHONEBOOK",
550        "es_",
551        "es__TRADITIONAL",
552        "root",
553    ])
554    # Put alias locales in a separate structure; see ICU-20627
555    dependency_data = io.read_locale_deps(sub_dir)
556    if "aliases" in dependency_data:
557        alias_locales = set(dependency_data["aliases"].keys())
558    else:
559        alias_locales = set()
560    alias_files = []
561    installed_files = []
562    for f in input_files:
563        file_stem = IndexRequest.locale_file_stem(f)
564        if file_stem in excluded_locales:
565            continue
566        destination = alias_files if file_stem in alias_locales else installed_files
567        destination.append(f)
568    cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None
569    index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
570        IN_SUB_DIR = sub_dir,
571        **common_vars
572    ))
573    index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
574        OUT_PREFIX = out_prefix,
575        **common_vars
576    ))
577    index_file_target_name = "%s_index_txt" % sub_dir
578    requests += [
579        IndexRequest(
580            name = index_file_target_name,
581            category = category,
582            installed_files = installed_files,
583            alias_files = alias_files,
584            txt_file = index_file_txt,
585            output_file = index_res_file,
586            cldr_version = cldr_version,
587            args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
588                "-k "
589                "{INDEX_NAME}.txt",
590            format_with = {
591                "IN_SUB_DIR": sub_dir,
592                "OUT_PREFIX": out_prefix
593            }
594        )
595    ]
596
597    return requests
598