1#! /usr/bin/env python3 2# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 3 4"""Generate binary message catalog from textual translation description. 5 6This program converts a textual Uniforum-style message catalog (.po file) into 7a binary GNU catalog (.mo file). This is essentially the same function as the 8GNU msgfmt program, however, it is a simpler implementation. 9 10Usage: msgfmt.py [OPTIONS] filename.po 11 12Options: 13 -o file 14 --output-file=file 15 Specify the output file to write to. If omitted, output will go to a 16 file named filename.mo (based off the input file name). 17 18 -h 19 --help 20 Print this message and exit. 21 22 -V 23 --version 24 Display version information and exit. 25""" 26 27import os 28import sys 29import ast 30import getopt 31import struct 32import array 33from email.parser import HeaderParser 34 35__version__ = "1.1" 36 37MESSAGES = {} 38 39 40 41def usage(code, msg=''): 42 print(__doc__, file=sys.stderr) 43 if msg: 44 print(msg, file=sys.stderr) 45 sys.exit(code) 46 47 48 49def add(id, str, fuzzy): 50 "Add a non-fuzzy translation to the dictionary." 51 global MESSAGES 52 if not fuzzy and str: 53 MESSAGES[id] = str 54 55 56 57def generate(): 58 "Return the generated output." 59 global MESSAGES 60 # the keys are sorted in the .mo file 61 keys = sorted(MESSAGES.keys()) 62 offsets = [] 63 ids = strs = b'' 64 for id in keys: 65 # For each string, we need size and file offset. Each string is NUL 66 # terminated; the NUL does not count into the size. 67 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 68 ids += id + b'\0' 69 strs += MESSAGES[id] + b'\0' 70 output = '' 71 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 72 # the keys start right after the index tables. 73 # translated string. 74 keystart = 7*4+16*len(keys) 75 # and the values start after the keys 76 valuestart = keystart + len(ids) 77 koffsets = [] 78 voffsets = [] 79 # The string table first has the list of keys, then the list of values. 80 # Each entry has first the size of the string, then the file offset. 81 for o1, l1, o2, l2 in offsets: 82 koffsets += [l1, o1+keystart] 83 voffsets += [l2, o2+valuestart] 84 offsets = koffsets + voffsets 85 output = struct.pack("Iiiiiii", 86 0x950412de, # Magic 87 0, # Version 88 len(keys), # # of entries 89 7*4, # start of key index 90 7*4+len(keys)*8, # start of value index 91 0, 0) # size and offset of hash table 92 output += array.array("i", offsets).tostring() 93 output += ids 94 output += strs 95 return output 96 97 98 99def make(filename, outfile): 100 ID = 1 101 STR = 2 102 103 # Compute .mo name from .po name and arguments 104 if filename.endswith('.po'): 105 infile = filename 106 else: 107 infile = filename + '.po' 108 if outfile is None: 109 outfile = os.path.splitext(infile)[0] + '.mo' 110 111 try: 112 lines = open(infile, 'rb').readlines() 113 except IOError as msg: 114 print(msg, file=sys.stderr) 115 sys.exit(1) 116 117 section = None 118 fuzzy = 0 119 120 # Start off assuming Latin-1, so everything decodes without failure, 121 # until we know the exact encoding 122 encoding = 'latin-1' 123 124 # Parse the catalog 125 lno = 0 126 for l in lines: 127 l = l.decode(encoding) 128 lno += 1 129 # If we get a comment line after a msgstr, this is a new entry 130 if l[0] == '#' and section == STR: 131 add(msgid, msgstr, fuzzy) 132 section = None 133 fuzzy = 0 134 # Record a fuzzy mark 135 if l[:2] == '#,' and 'fuzzy' in l: 136 fuzzy = 1 137 # Skip comments 138 if l[0] == '#': 139 continue 140 # Now we are in a msgid section, output previous section 141 if l.startswith('msgid') and not l.startswith('msgid_plural'): 142 if section == STR: 143 add(msgid, msgstr, fuzzy) 144 if not msgid: 145 # See whether there is an encoding declaration 146 p = HeaderParser() 147 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 148 if charset: 149 encoding = charset 150 section = ID 151 l = l[5:] 152 msgid = msgstr = b'' 153 is_plural = False 154 # This is a message with plural forms 155 elif l.startswith('msgid_plural'): 156 if section != ID: 157 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 158 file=sys.stderr) 159 sys.exit(1) 160 l = l[12:] 161 msgid += b'\0' # separator of singular and plural 162 is_plural = True 163 # Now we are in a msgstr section 164 elif l.startswith('msgstr'): 165 section = STR 166 if l.startswith('msgstr['): 167 if not is_plural: 168 print('plural without msgid_plural on %s:%d' % (infile, lno), 169 file=sys.stderr) 170 sys.exit(1) 171 l = l.split(']', 1)[1] 172 if msgstr: 173 msgstr += b'\0' # Separator of the various plural forms 174 else: 175 if is_plural: 176 print('indexed msgstr required for plural on %s:%d' % (infile, lno), 177 file=sys.stderr) 178 sys.exit(1) 179 l = l[6:] 180 # Skip empty lines 181 l = l.strip() 182 if not l: 183 continue 184 l = ast.literal_eval(l) 185 if section == ID: 186 msgid += l.encode(encoding) 187 elif section == STR: 188 msgstr += l.encode(encoding) 189 else: 190 print('Syntax error on %s:%d' % (infile, lno), \ 191 'before:', file=sys.stderr) 192 print(l, file=sys.stderr) 193 sys.exit(1) 194 # Add last entry 195 if section == STR: 196 add(msgid, msgstr, fuzzy) 197 198 # Compute output 199 output = generate() 200 201 try: 202 open(outfile,"wb").write(output) 203 except IOError as msg: 204 print(msg, file=sys.stderr) 205 206 207 208def main(): 209 try: 210 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 211 ['help', 'version', 'output-file=']) 212 except getopt.error as msg: 213 usage(1, msg) 214 215 outfile = None 216 # parse options 217 for opt, arg in opts: 218 if opt in ('-h', '--help'): 219 usage(0) 220 elif opt in ('-V', '--version'): 221 print("msgfmt.py", __version__) 222 sys.exit(0) 223 elif opt in ('-o', '--output-file'): 224 outfile = arg 225 # do it 226 if not args: 227 print('No input file given', file=sys.stderr) 228 print("Try `msgfmt --help' for more information.", file=sys.stderr) 229 return 230 231 for filename in args: 232 make(filename, outfile) 233 234 235if __name__ == '__main__': 236 main() 237