1# Step 02 - parse RFC.
2#
3# Static dictionary is described in "Appendix A" section in a hexadecimal form.
4# This tool locates dictionary data in RFC and converts it to raw binary format.
5
6import re
7
8rfc_path = "rfc7932.txt"
9
10with open(rfc_path, "r") as rfc:
11  lines = rfc.readlines()
12
13re_data_line = re.compile("^      [0-9a-f]{64}$")
14
15appendix_a_found = False
16dictionary = []
17for line in lines:
18  if appendix_a_found:
19    if re_data_line.match(line) is not None:
20      data = line.strip()
21      for i in range(32):
22        dictionary.append(int(data[2 * i : 2 * i + 2], 16))
23      if len(dictionary) == 122784:
24        break
25  else:
26    if line.startswith("Appendix A."):
27      appendix_a_found = True
28
29bin_path = "dictionary.bin"
30
31with open(bin_path, "wb") as output:
32  output.write(bytearray(dictionary))
33
34print("Parsed and saved " + str(len(dictionary)) + " bytes to " + bin_path)
35