1# Step 04 - generate Java literals.
2#
3# Java byte-code has ridiculous restrictions. There is no such thing as
4# "array literal" - those are implemented as series of data[x] = y;
5# as a consequence N-byte array will use 7N bytes in class, plus N bytes
6# in instantiated variable. Also no literal could be longer than 64KiB.
7#
8# To keep dictionary data compact both in source code and in compiled format
9# we use the following tricks:
10#  * use String as a data container
11#  * store only lowest 7 bits; i.e. all characters fit ASCII table; this allows
12#    efficient conversion to byte array; also ASCII characters use only 1 byte
13#.   of memory (UTF-8 encoding)
14#  * RLE-compress sequence of 8-th bits
15#
16# This script generates literals used in Java code.
17
18bin_path = "dictionary.bin"
19
20with open(bin_path, "rb") as raw:
21  data = raw.read()
22
23low = []
24hi = []
25is_skip = True
26skip_flip_offset = 36
27cntr = skip_flip_offset
28for b in data:
29  value = ord(b)
30  low.append(chr(value & 0x7F))
31  if is_skip:
32    if value < 0x80:
33      cntr += 1
34    else:
35      is_skip = False
36      hi.append(unichr(cntr))
37      cntr = skip_flip_offset + 1
38  else:
39    if value >= 0x80:
40      cntr += 1
41    else:
42      is_skip = True
43      hi.append(unichr(cntr))
44      cntr = skip_flip_offset + 1
45hi.append(unichr(cntr))
46
47low0 = low[0 : len(low) // 2]
48low1 = low[len(low) // 2 : len(low)]
49
50def escape(chars):
51  result = []
52  for c in chars:
53    if "\r" == c:
54      result.append("\\r")
55    elif "\n" == c:
56      result.append("\\n")
57    elif "\t" == c:
58      result.append("\\t")
59    elif "\"" == c:
60      result.append("\\\"")
61    elif "\\" == c:
62      result.append("\\\\")
63    elif ord(c) < 32 or ord(c) >= 127:
64      result.append("\\u%04X" % ord(c))
65    else:
66      result.append(c);
67  return result
68
69
70source_code = [
71    "  private static final String DATA0 = \"", "".join(escape(low0)), "\";\n",
72    "  private static final String DATA1 = \"", "".join(escape(low1)), "\";\n",
73    "  private static final String SKIP_FLIP = \"", "".join(escape(hi)), "\";\n"
74]
75
76src_path = "DictionaryData.inc.java"
77
78with open(src_path, "w") as source:
79  source.write("".join(source_code))
80