1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''Fast and efficient parser for XTB files. 7''' 8 9 10import sys 11import xml.sax 12import xml.sax.handler 13 14import grit.node.base 15 16 17class XtbContentHandler(xml.sax.handler.ContentHandler): 18 '''A content handler that calls a given callback function for each 19 translation in the XTB file. 20 ''' 21 22 def __init__(self, callback, defs=None, debug=False, target_platform=None): 23 self.callback = callback 24 self.debug = debug 25 # 0 if we are not currently parsing a translation, otherwise the message 26 # ID of that translation. 27 self.current_id = 0 28 # Empty if we are not currently parsing a translation, otherwise the 29 # parts we have for that translation - a list of tuples 30 # (is_placeholder, text) 31 self.current_structure = [] 32 # Set to the language ID when we see the <translationbundle> node. 33 self.language = '' 34 # Keep track of the if block we're inside. We can't nest ifs. 35 self.if_expr = None 36 # Root defines to be used with if expr. 37 if defs: 38 self.defines = defs 39 else: 40 self.defines = {} 41 # Target platform for build. 42 if target_platform: 43 self.target_platform = target_platform 44 else: 45 self.target_platform = sys.platform 46 47 def startElement(self, name, attrs): 48 if name == 'translation': 49 assert self.current_id == 0 and len(self.current_structure) == 0, ( 50 "Didn't expect a <translation> element here.") 51 self.current_id = attrs.getValue('id') 52 elif name == 'ph': 53 assert self.current_id != 0, "Didn't expect a <ph> element here." 54 self.current_structure.append((True, attrs.getValue('name'))) 55 elif name == 'translationbundle': 56 self.language = attrs.getValue('lang') 57 elif name in ('if', 'then', 'else'): 58 assert self.if_expr is None, "Can't nest <if> or use <else> in xtb files" 59 self.if_expr = attrs.getValue('expr') 60 61 def endElement(self, name): 62 if name == 'translation': 63 assert self.current_id != 0 64 65 defs = self.defines 66 def pp_ifdef(define): 67 return define in defs 68 def pp_if(define): 69 return define in defs and defs[define] 70 71 # If we're in an if block, only call the callback (add the translation) 72 # if the expression is True. 73 should_run_callback = True 74 if self.if_expr: 75 should_run_callback = grit.node.base.Node.EvaluateExpression( 76 self.if_expr, self.defines, self.target_platform) 77 if should_run_callback: 78 self.callback(self.current_id, self.current_structure) 79 80 self.current_id = 0 81 self.current_structure = [] 82 elif name == 'if': 83 assert self.if_expr is not None 84 self.if_expr = None 85 86 def characters(self, content): 87 if self.current_id != 0: 88 # We are inside a <translation> node so just add the characters to our 89 # structure. 90 # 91 # This naive way of handling characters is OK because in the XTB format, 92 # <ph> nodes are always empty (always <ph name="XXX"/>) and whitespace 93 # inside the <translation> node should be preserved. 94 self.current_structure.append((False, content)) 95 96 97class XtbErrorHandler(xml.sax.handler.ErrorHandler): 98 def error(self, exception): 99 pass 100 101 def fatalError(self, exception): 102 raise exception 103 104 def warning(self, exception): 105 pass 106 107 108def Parse(xtb_file, callback_function, defs=None, debug=False, 109 target_platform=None): 110 '''Parse xtb_file, making a call to callback_function for every translation 111 in the XTB file. 112 113 The callback function must have the signature as described below. The 'parts' 114 parameter is a list of tuples (is_placeholder, text). The 'text' part is 115 either the raw text (if is_placeholder is False) or the name of the placeholder 116 (if is_placeholder is True). 117 118 Args: 119 xtb_file: open('fr.xtb') 120 callback_function: def Callback(msg_id, parts): pass 121 defs: None, or a dictionary of preprocessor definitions. 122 debug: Default False. Set True for verbose debug output. 123 target_platform: None, or a sys.platform-like identifier of the build 124 target platform. 125 126 Return: 127 The language of the XTB, e.g. 'fr' 128 ''' 129 # Start by advancing the file pointer past the DOCTYPE thing, as the TC 130 # uses a path to the DTD that only works in Unix. 131 # TODO(joi) Remove this ugly hack by getting the TC gang to change the 132 # XTB files somehow? 133 front_of_file = xtb_file.read(1024) 134 xtb_file.seek(front_of_file.find('<translationbundle')) 135 136 handler = XtbContentHandler(callback=callback_function, defs=defs, 137 debug=debug, target_platform=target_platform) 138 xml.sax.parse(xtb_file, handler) 139 assert handler.language != '' 140 return handler.language 141 142