1# 2# ElementTree 3# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $ 4# 5# limited xinclude support for element trees 6# 7# history: 8# 2003-08-15 fl created 9# 2003-11-14 fl fixed default loader 10# 11# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. 12# 13# fredrik@pythonware.com 14# http://www.pythonware.com 15# 16# -------------------------------------------------------------------- 17# The ElementTree toolkit is 18# 19# Copyright (c) 1999-2008 by Fredrik Lundh 20# 21# By obtaining, using, and/or copying this software and/or its 22# associated documentation, you agree that you have read, understood, 23# and will comply with the following terms and conditions: 24# 25# Permission to use, copy, modify, and distribute this software and 26# its associated documentation for any purpose and without fee is 27# hereby granted, provided that the above copyright notice appears in 28# all copies, and that both that copyright notice and this permission 29# notice appear in supporting documentation, and that the name of 30# Secret Labs AB or the author not be used in advertising or publicity 31# pertaining to distribution of the software without specific, written 32# prior permission. 33# 34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 36# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 41# OF THIS SOFTWARE. 42# -------------------------------------------------------------------- 43 44# Licensed to PSF under a Contributor Agreement. 45# See http://www.python.org/psf/license for licensing details. 46 47## 48# Limited XInclude support for the ElementTree package. 49## 50 51import copy 52from . import ElementTree 53from urllib.parse import urljoin 54 55XINCLUDE = "{http://www.w3.org/2001/XInclude}" 56 57XINCLUDE_INCLUDE = XINCLUDE + "include" 58XINCLUDE_FALLBACK = XINCLUDE + "fallback" 59 60# For security reasons, the inclusion depth is limited to this read-only value by default. 61DEFAULT_MAX_INCLUSION_DEPTH = 6 62 63 64## 65# Fatal include error. 66 67class FatalIncludeError(SyntaxError): 68 pass 69 70 71class LimitedRecursiveIncludeError(FatalIncludeError): 72 pass 73 74 75## 76# Default loader. This loader reads an included resource from disk. 77# 78# @param href Resource reference. 79# @param parse Parse mode. Either "xml" or "text". 80# @param encoding Optional text encoding (UTF-8 by default for "text"). 81# @return The expanded resource. If the parse mode is "xml", this 82# is an ElementTree instance. If the parse mode is "text", this 83# is a Unicode string. If the loader fails, it can return None 84# or raise an OSError exception. 85# @throws OSError If the loader fails to load the resource. 86 87def default_loader(href, parse, encoding=None): 88 if parse == "xml": 89 with open(href, 'rb') as file: 90 data = ElementTree.parse(file).getroot() 91 else: 92 if not encoding: 93 encoding = 'UTF-8' 94 with open(href, 'r', encoding=encoding) as file: 95 data = file.read() 96 return data 97 98## 99# Expand XInclude directives. 100# 101# @param elem Root element. 102# @param loader Optional resource loader. If omitted, it defaults 103# to {@link default_loader}. If given, it should be a callable 104# that implements the same interface as <b>default_loader</b>. 105# @param base_url The base URL of the original file, to resolve 106# relative include file references. 107# @param max_depth The maximum number of recursive inclusions. 108# Limited to reduce the risk of malicious content explosion. 109# Pass a negative value to disable the limitation. 110# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. 111# @throws FatalIncludeError If the function fails to include a given 112# resource, or if the tree contains malformed XInclude elements. 113# @throws IOError If the function fails to load a given resource. 114# @returns the node or its replacement if it was an XInclude node 115 116def include(elem, loader=None, base_url=None, 117 max_depth=DEFAULT_MAX_INCLUSION_DEPTH): 118 if max_depth is None: 119 max_depth = -1 120 elif max_depth < 0: 121 raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) 122 123 if hasattr(elem, 'getroot'): 124 elem = elem.getroot() 125 if loader is None: 126 loader = default_loader 127 128 _include(elem, loader, base_url, max_depth, set()) 129 130 131def _include(elem, loader, base_url, max_depth, _parent_hrefs): 132 # look for xinclude elements 133 i = 0 134 while i < len(elem): 135 e = elem[i] 136 if e.tag == XINCLUDE_INCLUDE: 137 # process xinclude directive 138 href = e.get("href") 139 if base_url: 140 href = urljoin(base_url, href) 141 parse = e.get("parse", "xml") 142 if parse == "xml": 143 if href in _parent_hrefs: 144 raise FatalIncludeError("recursive include of %s" % href) 145 if max_depth == 0: 146 raise LimitedRecursiveIncludeError( 147 "maximum xinclude depth reached when including file %s" % href) 148 _parent_hrefs.add(href) 149 node = loader(href, parse) 150 if node is None: 151 raise FatalIncludeError( 152 "cannot load %r as %r" % (href, parse) 153 ) 154 node = copy.copy(node) # FIXME: this makes little sense with recursive includes 155 _include(node, loader, href, max_depth - 1, _parent_hrefs) 156 _parent_hrefs.remove(href) 157 if e.tail: 158 node.tail = (node.tail or "") + e.tail 159 elem[i] = node 160 elif parse == "text": 161 text = loader(href, parse, e.get("encoding")) 162 if text is None: 163 raise FatalIncludeError( 164 "cannot load %r as %r" % (href, parse) 165 ) 166 if e.tail: 167 text += e.tail 168 if i: 169 node = elem[i-1] 170 node.tail = (node.tail or "") + text 171 else: 172 elem.text = (elem.text or "") + text 173 del elem[i] 174 continue 175 else: 176 raise FatalIncludeError( 177 "unknown parse type in xi:include tag (%r)" % parse 178 ) 179 elif e.tag == XINCLUDE_FALLBACK: 180 raise FatalIncludeError( 181 "xi:fallback tag must be child of xi:include (%r)" % e.tag 182 ) 183 else: 184 _include(e, loader, base_url, max_depth, _parent_hrefs) 185 i += 1 186